diff --git a/src/seclab_taskflows/mcp_servers/codeql_python/README.md b/src/seclab_taskflows/mcp_servers/codeql_python/README.md new file mode 100644 index 0000000..d8c0f4a --- /dev/null +++ b/src/seclab_taskflows/mcp_servers/codeql_python/README.md @@ -0,0 +1,34 @@ +Queries in support of the CodeQL MCP Server are maintained as query packs. + +If you add your own queries, please follow established conventions for normal CodeQL query pack development. + +To run the CodeQL for Python server: +- create a codespace, preferably with more cores +- install CodeQL extension for VS Code +- press `Ctrl/Cmd + Shift + P` and type "CodeQL: Install Pack Dependencies". Choose "sylwia-budzynska/mcp-python" and press "OK". +- find the path to the codeql binary, which comes preinstalled with the VS Code CodeQL extension, with the command: +```bash +find ~ -type f -name codeql -executable 2>/dev/null +``` +It will most likely look similar to this: +``` +/home/codespace/.vscode-remote/data/User/globalStorage/github.vscode-codeql/distribution1/codeql/codeql +``` +- create a folder named 'data' +- create or update your `.env` file in the root of this project with values for: +``` +COPILOT_TOKEN= # a fine-grained GitHub personal access token with permission for "copilot chat" +CODEQL_DBS_BASE_PATH="/workspaces/seclab-taskflows/data/codeql_databases" # path to folder with your CodeQL databases + +# Example values for a local setup, run with `python -m seclab_taskflow_agent -t seclab_taskflows.taskflows.audit.remote_sources_local` +MEMCACHE_STATE_DIR="/workspaces/seclab-taskflows/data" # path to folder for storing the memcache database +DATA_DIR="/workspaces/seclab-taskflows/data" # path to folder for storing the codeql_sqlite databases and all other data +GITHUB_PERSONAL_ACCESS_TOKEN= # can be the same token as COPILOT_TOKEN. Or another one, with access e.g. to private repositories +CODEQL_CLI= # output of command `find ~ -type f -name codeql -executable 2>/dev/null` + +# Example docker env run with ./run_seclab_agent.sh [...] +# CODEQL_CLI="codeql" +# CODEQL_DBS_BASE_PATH="/app/data/codeql_databases" +# MEMCACHE_STATE_DIR="/app/data" +# DATA_DIR="/app/data" +``` diff --git a/src/seclab_taskflows/mcp_servers/codeql_python/codeql_sqlite_models.py b/src/seclab_taskflows/mcp_servers/codeql_python/codeql_sqlite_models.py new file mode 100644 index 0000000..51d1224 --- /dev/null +++ b/src/seclab_taskflows/mcp_servers/codeql_python/codeql_sqlite_models.py @@ -0,0 +1,25 @@ +# SPDX-FileCopyrightText: 2025 GitHub +# SPDX-License-Identifier: MIT + +from sqlalchemy import Text +from sqlalchemy.orm import DeclarativeBase, mapped_column, Mapped +from typing import Optional + +class Base(DeclarativeBase): + pass + + +class Source(Base): + __tablename__ = 'source' + + id: Mapped[int] = mapped_column(primary_key=True) + repo: Mapped[str] + source_location: Mapped[str] + line: Mapped[int] + source_type: Mapped[str] + notes: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + + def __repr__(self): + return (f"") diff --git a/src/seclab_taskflows/mcp_servers/codeql_python/mcp_server.py b/src/seclab_taskflows/mcp_servers/codeql_python/mcp_server.py new file mode 100644 index 0000000..2ee817a --- /dev/null +++ b/src/seclab_taskflows/mcp_servers/codeql_python/mcp_server.py @@ -0,0 +1,225 @@ +# SPDX-FileCopyrightText: 2025 GitHub +# SPDX-License-Identifier: MIT + + +import logging +logging.basicConfig( + level=logging.DEBUG, + format='%(asctime)s - %(levelname)s - %(message)s', + filename='logs/mcp_codeql_python.log', + filemode='a' +) +from seclab_taskflow_agent.mcp_servers.codeql.client import run_query, _debug_log +# from seclab_taskflow_agent.path_utils import mcp_data_dir + +from pydantic import Field +#from mcp.server.fastmcp import FastMCP, Context +from fastmcp import FastMCP # use FastMCP 2.0 +from pathlib import Path +import os +import csv +import json +from sqlalchemy import create_engine +from sqlalchemy.orm import Session +import subprocess +import importlib.resources + +from .codeql_sqlite_models import Base, Source +from ..utils import process_repo + +MEMORY = Path(os.getenv('DATA_DIR', default='/app/data')) +CODEQL_DBS_BASE_PATH = Path(os.getenv('CODEQL_DBS_BASE_PATH', default='/app/data')) +# MEMORY = mcp_data_dir('seclab-taskflows', 'codeql', 'DATA_DIR') +# CODEQL_DBS_BASE_PATH = mcp_data_dir('seclab-taskflows', 'codeql', 'CODEQL_DBS_BASE_PATH') + +mcp = FastMCP("CodeQL-Python") + +# tool name -> templated query lookup for supported languages +TEMPLATED_QUERY_PATHS = { + # to add a language, port the templated query pack and add its definition here + 'python': { + 'remote_sources': 'queries/mcp-python/remote_sources.ql' + } +} + + +def source_to_dict(result): + return { + "source_id": result.id, + "repo": result.repo, + "source_location": result.source_location, + "line": result.line, + "source_type": result.source_type, + "notes": result.notes + } + +def _resolve_query_path(language: str, query: str) -> Path: + global TEMPLATED_QUERY_PATHS + if language not in TEMPLATED_QUERY_PATHS: + raise RuntimeError(f"Error: Language `{language}` not supported!") + query_path = TEMPLATED_QUERY_PATHS[language].get(query) + if not query_path: + raise RuntimeError(f"Error: query `{query}` not supported for `{language}`!") + return Path(query_path) + + +def _resolve_db_path(relative_db_path: str | Path): + global CODEQL_DBS_BASE_PATH + # path joins will return "/B" if "/A" / "////B" etc. as well + # not windows compatible and probably needs additional hardening + relative_db_path = str(relative_db_path).strip().lstrip('/') + relative_db_path = Path(relative_db_path) + absolute_path = (CODEQL_DBS_BASE_PATH / relative_db_path).resolve() + if not absolute_path.is_relative_to(CODEQL_DBS_BASE_PATH.resolve()): + raise RuntimeError(f"Error: Database path {absolute_path} is outside the base path {CODEQL_DBS_BASE_PATH}") + if not absolute_path.is_dir(): + _debug_log(f"Database path not found: {absolute_path}") + raise RuntimeError(f"Error: Database not found at {absolute_path}!") + return str(absolute_path) + +# This sqlite database is specifically made for CodeQL for Python MCP. +class CodeqlSqliteBackend: + def __init__(self, memcache_state_dir: str): + self.memcache_state_dir = memcache_state_dir + if not Path(self.memcache_state_dir).exists(): + db_dir = 'sqlite://' + else: + db_dir = f'sqlite:///{self.memcache_state_dir}/codeql_sqlite.db' + self.engine = create_engine(db_dir, echo=False) + Base.metadata.create_all(self.engine, tables=[Source.__table__]) + + + def store_new_source(self, repo, source_location, line, source_type, notes, update = False): + with Session(self.engine) as session: + existing = session.query(Source).filter_by(repo = repo, source_location = source_location, line = line).first() + if existing: + existing.notes = (existing.notes or "") + notes + session.commit() + return f"Updated notes for source at {source_location}, line {line} in {repo}." + else: + if update: + return f"No source exists at repo {repo}, location {source_location}, line {line} to update." + new_source = Source(repo = repo, source_location = source_location, line = line, source_type = source_type, notes = notes) + session.add(new_source) + session.commit() + return f"Added new source for {source_location} in {repo}." + + def get_sources(self, repo): + with Session(self.engine) as session: + results = session.query(Source).filter_by(repo = repo).all() + sources = [source_to_dict(source) for source in results] + return sources + + +# our query result format is: "human readable template {val0} {val1},'key0,key1',val0,val1" +def _csv_parse(raw): + results = [] + reader = csv.reader(raw.strip().splitlines()) + try: + for i, row in enumerate(reader): + if i == 0: + continue + # col1 has what we care about, but offer flexibility + keys = row[1].split(',') + this_obj = {'description': row[0].format(*row[2:])} + for j, k in enumerate(keys): + this_obj[k.strip()] = row[j + 2] + results.append(this_obj) + except (csv.Error, IndexError, ValueError) as e: + return f"Error: CSV parsing error: {e}" + return results + + +def _run_query(query_name: str, database_path: str, language: str, template_values: dict): + """Run a CodeQL query and return the results""" + + try: + database_path = _resolve_db_path(database_path) + except RuntimeError: + return f"The database path for {database_path} could not be resolved" + try: + query_path = _resolve_query_path(language, query_name) + except RuntimeError: + return f"The query {query_name} is not supported for language: {language}" + try: + csv = run_query(Path(__file__).parent.resolve() / + query_path, + database_path, + fmt='csv', + template_values=template_values, + log_stderr=True) + return _csv_parse(csv) + except Exception as e: + return f"The query {query_name} encountered an error: {e}" + +backend = CodeqlSqliteBackend(MEMORY) + +@mcp.tool() +def remote_sources(owner: str = Field(description="The owner of the GitHub repository"), + repo: str = Field(description="The name of the GitHub repository"), + database_path: str = Field(description="The CodeQL database path."), + language: str = Field(description="The language used for the CodeQL database.")): + """List all remote sources and their locations in a CodeQL database, then store the results in a database.""" + + repo = process_repo(owner, repo) + results = _run_query('remote_sources', database_path, language, {}) + + # Check if results is an error (list of strings) or valid data (list of dicts) + if isinstance(results, str): + return f"Error: {results}" + + # Store each result as a source + stored_count = 0 + for result in results: + backend.store_new_source( + repo=repo, + source_location=result.get('location', ''), + source_type=result.get('source', ''), + line=int(result.get('line', '0')), + notes=None, #result.get('description', ''), + update=False + ) + stored_count += 1 + + return f"Stored {stored_count} remote sources in {repo}." + +@mcp.tool() +def fetch_sources(owner: str = Field(description="The owner of the GitHub repository"), + repo: str = Field(description="The name of the GitHub repository")): + """ + Fetch all sources from the repo + """ + repo = process_repo(owner, repo) + return json.dumps(backend.get_sources(repo)) + +@mcp.tool() +def add_source_notes(owner: str = Field(description="The owner of the GitHub repository"), + repo: str = Field(description="The name of the GitHub repository"), + source_location: str = Field(description="The path to the file"), + line: int = Field(description="The line number of the source"), + notes: str = Field(description="The notes to append to this source")): + """ + Add new notes to an existing source. The notes will be appended to any existing notes. + """ + repo = process_repo(owner, repo) + return backend.store_new_source(repo = repo, source_location = source_location, line = line, source_type = "", notes = notes, update=True) + +@mcp.tool() +def clear_codeql_repo(owner: str = Field(description="The owner of the GitHub repository"), + repo: str = Field(description="The name of the GitHub repository")): + """ + Clear all data for a given repo from the database + """ + repo = process_repo(owner, repo) + with Session(backend.engine) as session: + deleted_sources = session.query(Source).filter_by(repo = repo).delete() + session.commit() + return f"Cleared {deleted_sources} sources from repo {repo}." + +if __name__ == "__main__": + # Check if codeql/python-all pack is installed, if not install it + if not os.path.isdir('/.codeql/packages/codeql/python-all'): + pack_path = importlib.resources.files('seclab_taskflows.mcp_servers.codeql_python.queries').joinpath('mcp-python') + print(f"Installing CodeQL pack from {pack_path}") + subprocess.run(["codeql", "pack", "install", pack_path]) + mcp.run(show_banner=False, transport="http", host="127.0.0.1", port=9998) diff --git a/src/seclab_taskflows/mcp_servers/codeql_python/queries/mcp-python/codeql-pack.lock.yml b/src/seclab_taskflows/mcp_servers/codeql_python/queries/mcp-python/codeql-pack.lock.yml new file mode 100644 index 0000000..a1dd003 --- /dev/null +++ b/src/seclab_taskflows/mcp_servers/codeql_python/queries/mcp-python/codeql-pack.lock.yml @@ -0,0 +1,30 @@ +--- +lockVersion: 1.0.0 +dependencies: + codeql/concepts: + version: 0.0.8 + codeql/controlflow: + version: 2.0.18 + codeql/dataflow: + version: 2.0.18 + codeql/mad: + version: 1.0.34 + codeql/python-all: + version: 4.1.0 + codeql/regex: + version: 1.0.34 + codeql/ssa: + version: 2.0.10 + codeql/threat-models: + version: 1.0.34 + codeql/tutorial: + version: 1.0.34 + codeql/typetracking: + version: 2.0.18 + codeql/util: + version: 2.0.21 + codeql/xml: + version: 1.0.34 + codeql/yaml: + version: 1.0.34 +compiled: false diff --git a/src/seclab_taskflows/mcp_servers/codeql_python/queries/mcp-python/codeql-pack.yml b/src/seclab_taskflows/mcp_servers/codeql_python/queries/mcp-python/codeql-pack.yml new file mode 100644 index 0000000..b0a2e23 --- /dev/null +++ b/src/seclab_taskflows/mcp_servers/codeql_python/queries/mcp-python/codeql-pack.yml @@ -0,0 +1,7 @@ +--- +library: false +warnOnImplicitThis: false +name: sylwia-budzynska/mcp-python +version: 0.0.1 +dependencies: + codeql/python-all: ^4.1.0 \ No newline at end of file diff --git a/src/seclab_taskflows/mcp_servers/codeql_python/queries/mcp-python/example.ql b/src/seclab_taskflows/mcp_servers/codeql_python/queries/mcp-python/example.ql new file mode 100644 index 0000000..e45af98 --- /dev/null +++ b/src/seclab_taskflows/mcp_servers/codeql_python/queries/mcp-python/example.ql @@ -0,0 +1,12 @@ +/** + * This is an automatically generated file + * @name Hello world + * @kind problem + * @problem.severity warning + * @id python/example/hello-world + */ + +import python + +from File f +select f, "Hello, world!" \ No newline at end of file diff --git a/src/seclab_taskflows/mcp_servers/codeql_python/queries/mcp-python/remote_sources.ql b/src/seclab_taskflows/mcp_servers/codeql_python/queries/mcp-python/remote_sources.ql new file mode 100644 index 0000000..32ea16c --- /dev/null +++ b/src/seclab_taskflows/mcp_servers/codeql_python/queries/mcp-python/remote_sources.ql @@ -0,0 +1,21 @@ +/** + * @id mcp-python/remote-sources + * @name Python Remote Sources + * @description Identifies nodes that act as remote sources in Python code, along with their locations. + * @tags source, location + */ +import python +import semmle.python.dataflow.new.RemoteFlowSources + +// string normalizeLocation(Location l) { +// result = l.getFile().getRelativePath() + ":" + l.getStartLine().toString() + ":" + l.getStartColumn().toString() +// + ":" + l.getEndLine().toString() + ":" + l.getEndColumn().toString() +// } + +from RemoteFlowSource source +select + "Remote source {0} is defined at {1} line {2}", + "source,location,line", + source.getSourceType(), + source.getLocation().getFile().getRelativePath(), + source.getLocation().getStartLine().toString() diff --git a/src/seclab_taskflows/mcp_servers/repo_context.py b/src/seclab_taskflows/mcp_servers/repo_context.py index d4c5917..5bf20dc 100644 --- a/src/seclab_taskflows/mcp_servers/repo_context.py +++ b/src/seclab_taskflows/mcp_servers/repo_context.py @@ -21,6 +21,7 @@ from pathlib import Path from .repo_context_models import Application, EntryPoint, UserAction, WebEntryPoint, ApplicationIssue, AuditResult, Base +from .utils import process_repo MEMORY = Path(os.getenv('REPO_CONTEXT_DIR', default='/app/my_data')) @@ -90,9 +91,9 @@ def __init__(self, memcache_state_dir: str): else: db_dir = f'sqlite:///{self.memcache_state_dir}/repo_context.db' self.engine = create_engine(db_dir, echo=False) - Base.metadata.create_all(self.engine, tables=[Application.__table__, EntryPoint.__table__, UserAction.__table__, + Base.metadata.create_all(self.engine, tables=[Application.__table__, EntryPoint.__table__, UserAction.__table__, WebEntryPoint.__table__, ApplicationIssue.__table__, AuditResult.__table__]) - + def store_new_application(self, repo, location, is_app, is_library, notes): with Session(self.engine) as session: existing = session.query(Application).filter_by(repo = repo, location = location).first() @@ -107,7 +108,7 @@ def store_new_application(self, repo, location, is_app, is_library, notes): session.add(new_application) session.commit() return f"Updated or added application for {location} in {repo}." - + def store_new_component_issue(self, repo, component_id, issue_type, notes): with Session(self.engine) as session: existing = session.query(ApplicationIssue).filter_by(repo = repo, component_id = component_id, issue_type = issue_type).first() @@ -155,7 +156,7 @@ def store_new_entry_point(self, repo, app_id, file, user_input, line, notes, upd session.add(new_entry_point) session.commit() return f"Updated or added entry point for {file} and {line} in {repo}." - + def store_new_web_entry_point(self, repo, entry_point_id, method, path, component, auth, middleware, roles_scopes, notes, update = False): with Session(self.engine) as session: existing = session.query(WebEntryPoint).filter_by(repo = repo, entry_point_id = entry_point_id).first() @@ -177,7 +178,7 @@ def store_new_web_entry_point(self, repo, entry_point_id, method, path, componen if update: return f"No web entry point exists at repo {repo} with entry_point_id {entry_point_id}." new_web_entry_point = WebEntryPoint( - repo = repo, + repo = repo, entry_point_id = entry_point_id, method = method, path = path, @@ -190,7 +191,7 @@ def store_new_web_entry_point(self, repo, entry_point_id, method, path, componen session.add(new_web_entry_point) session.commit() return f"Updated or added web entry point for entry_point_id {entry_point_id} in {repo}." - + def store_new_user_action(self, repo, app_id, file, line, notes, update = False): with Session(self.engine) as session: existing = session.query(UserAction).filter_by(repo = repo, file = file, line = line).first() @@ -203,7 +204,7 @@ def store_new_user_action(self, repo, app_id, file, line, notes, update = False) session.add(new_user_action) session.commit() return f"Updated or added user action for {file} and {line} in {repo}." - + def get_app(self, repo, location): with Session(self.engine) as session: existing = session.query(Application).filter_by(repo = repo, location = location).first() @@ -271,7 +272,7 @@ def get_web_entries_for_repo(self, repo): with Session(self.engine) as session: results = session.query(WebEntryPoint).filter_by(repo = repo).all() return [{ - 'repo' : r.repo, + 'repo' : r.repo, 'entry_point_id' : r.entry_point_id, 'method' : r.method, 'path' : r.path, @@ -286,7 +287,7 @@ def get_web_entries(self, repo, component_id): with Session(self.engine) as session: results = session.query(WebEntryPoint).filter_by(repo = repo, component = component_id).all() return [{ - 'repo' : r.repo, + 'repo' : r.repo, 'entry_point_id' : r.entry_point_id, 'method' : r.method, 'path' : r.path, @@ -313,7 +314,7 @@ def get_user_actions_for_repo(self, repo): ).filter(UserAction.app_id == Application.id).all() uas = [user_action_to_dict(ua) for app, ua in results] return uas - + def clear_repo(self, repo): with Session(self.engine) as session: session.query(Application).filter_by(repo = repo).delete() @@ -324,7 +325,7 @@ def clear_repo(self, repo): session.query(AuditResult).filter_by(repo = repo).delete() session.commit() return f"Cleared results for repo {repo}" - + def clear_repo_issues(self, repo): with Session(self.engine) as session: session.query(ApplicationIssue).filter_by(repo = repo).delete() @@ -336,13 +337,10 @@ def clear_repo_issues(self, repo): backend = RepoContextBackend(MEMORY) -def process_repo(owner, repo): - return f"{owner}/{repo}".lower() - @mcp.tool() -def store_new_component(owner: str, repo: str, location: str = Field(description="The directory of the component"), - is_app: bool = Field(description="Is this an application", default=None), - is_library: bool = Field(description="Is this a library", default=None), +def store_new_component(owner: str, repo: str, location: str = Field(description="The directory of the component"), + is_app: bool = Field(description="Is this an application", default=None), + is_library: bool = Field(description="Is this a library", default=None), notes: str = Field(description="The notes taken for this component", default="")): """ Stores a new component in the database. @@ -386,9 +384,9 @@ def store_new_component_issue(owner: str, repo: str, component_id: int, return backend.store_new_component_issue(repo, component_id, issue_type, notes) @mcp.tool() -def store_new_audit_result(owner: str, repo: str, component_id: int, issue_type: str, issue_id: int, - has_non_security_error: bool = Field(description="Set to true if there are security issues or logic error but may not be exploitable"), - has_vulnerability: bool = Field(description="Set to true if a security vulnerability is identified"), +def store_new_audit_result(owner: str, repo: str, component_id: int, issue_type: str, issue_id: int, + has_non_security_error: bool = Field(description="Set to true if there are security issues or logic error but may not be exploitable"), + has_vulnerability: bool = Field(description="Set to true if a security vulnerability is identified"), notes: str = Field(description="The notes for the audit of this issue")): """ Stores the audit result for issue with issue_id. @@ -397,7 +395,7 @@ def store_new_audit_result(owner: str, repo: str, component_id: int, issue_type: return backend.store_new_audit_result(repo, component_id, issue_type, issue_id, has_non_security_error, has_vulnerability, notes) @mcp.tool() -def store_new_web_entry_point(owner: str, repo: str, +def store_new_web_entry_point(owner: str, repo: str, entry_point_id: int = Field(description="The ID of the entry point this web entry point refers to"), location: str = Field(description="The directory of the component where the web entry point belongs to"), method: str = Field(description="HTTP method (GET, POST, etc)", default=""), @@ -432,7 +430,7 @@ def add_entry_point_notes(owner: str, repo: str, @mcp.tool() def store_new_user_action(owner: str, repo: str, location: str = Field(description="The directory of the component where the user action belonged to"), file: str = Field(description="The file that contains the user action"), - line: int = Field(description="The file line that contains the user action"), + line: int = Field(description="The file line that contains the user action"), notes: str = Field(description="New notes for this user action", default = "")): """ Stores a new user action in a component to the database. diff --git a/src/seclab_taskflows/mcp_servers/utils.py b/src/seclab_taskflows/mcp_servers/utils.py new file mode 100644 index 0000000..528f9c4 --- /dev/null +++ b/src/seclab_taskflows/mcp_servers/utils.py @@ -0,0 +1,15 @@ +# SPDX-FileCopyrightText: 2025 GitHub +# SPDX-License-Identifier: MIT + +def process_repo(owner, repo): + """ + Normalize repository identifier to lowercase format 'owner/repo'. + + Args: + owner (str): The owner of the repository. + repo (str): The name of the repository. + + Returns: + str: The normalized repository identifier in lowercase. + """ + return f"{owner}/{repo}".lower() diff --git a/src/seclab_taskflows/personalities/auditor.yaml b/src/seclab_taskflows/personalities/auditor.yaml new file mode 100644 index 0000000..33b6fd5 --- /dev/null +++ b/src/seclab_taskflows/personalities/auditor.yaml @@ -0,0 +1,20 @@ +# SPDX-FileCopyrightText: 2025 GitHub +# SPDX-License-Identifier: MIT + +seclab-taskflow-agent: + version: 1 + filetype: personality + +personality: | + You are a code security expert. + You have the ability to call tools to aid you in your security reviews. + +task: | + Find and call out security vulnerabilities in any provided code. + Any user prompt that follows this one with more specific instructions + should be followed instead. Try to include line number information for + your findings where possible. + +toolboxes: + - seclab_taskflow_agent.toolboxes.memcache + - seclab_taskflows.toolboxes.codeql_python diff --git a/src/seclab_taskflows/taskflows/audit/remote_sources_local.yaml b/src/seclab_taskflows/taskflows/audit/remote_sources_local.yaml new file mode 100644 index 0000000..cef6bdc --- /dev/null +++ b/src/seclab_taskflows/taskflows/audit/remote_sources_local.yaml @@ -0,0 +1,96 @@ +# SPDX-FileCopyrightText: 2025 GitHub +# SPDX-License-Identifier: MIT + +seclab-taskflow-agent: + filetype: taskflow + version: 1 + +model_config: seclab_taskflows.configs.model_config + +globals: + repo: +# Taskflow to analyze the existing information +taskflow: + - task: + must_complete: true + headless: true + model: general_tasks + agents: + - seclab_taskflow_agent.personalities.assistant + user_prompt: | + Clear the memory cache and clear the codeql_sqlite database for repo {{ GLOBALS_repo }}. + toolboxes: + - seclab_taskflow_agent.toolboxes.memcache + - seclab_taskflows.toolboxes.codeql_python + - task: + model: general_tasks + must_complete: true + headless: true + agents: + - seclab_taskflow_agent.personalities.assistant + user_prompt: | + For the repo {{ GLOBALS_repo }} fetch the Python CodeQL database and find all remote flow sources using CodeQL. + toolboxes: + - seclab_taskflows.toolboxes.gh_code_scanning + - seclab_taskflows.toolboxes.codeql_python + - task: + must_complete: true + exclude_from_context: true + model: general_tasks + agents: + - seclab_taskflow_agent.personalities.assistant + user_prompt: | + Fetch the zipball of the repository {{ GLOBALS_repo }}. + toolboxes: + - seclab_taskflows.toolboxes.local_gh_resources + - task: + must_complete: true + exclude_from_context: true + model: general_tasks + agents: + - seclab_taskflow_agent.personalities.assistant + user_prompt: | + Fetch the sources from the repo {{ GLOBALS_repo }}. + toolboxes: + - seclab_taskflows.toolboxes.codeql_python + - task: + model: code_analysis + must_complete: false + repeat_prompt: true + max_steps: 100 + name: source analysis + description: Identify actions that untrusted users are allowed to perform on the source. + agents: + - seclab_taskflows.personalities.auditor + user_prompt: | + The source is a {{ RESULT_source_type }} in {{ RESULT_repo }} in the location {{ RESULT_source_location }} on line {{ RESULT_line }}. + If the source is in a folder relating to tests or demo code, skip the analysis and update the source entry in the codeql_sqlite database indicating it is not relevant. + Analyze what the source endpoint is used for. + If it is a web endpoint, identify the routing path that reaches this source, HTTP method, + any middlewares used, which roles are allowed to call it. + Note which kind of authentication is required for that endpoint. + It is possible that the source does not require any authentication. + If authorization is required, note the details. + + Update the source entry in the codeql_sqlite database with your findings. + ## IMPORTANT: General Guidance that ALWAYS applies + + 1. Do NOT ask the user for permission to perform next steps, continue your + analysis autonomously until it is complete. + + 2. Do NOT use 'fetch_sources' or 'remote_sources' tools. + + 3. Do NOT speculate. If you do not have access to the information you need, respond with + the error you encountered. + toolboxes: + - seclab_taskflows.toolboxes.codeql_python + - seclab_taskflows.toolboxes.local_file_viewer + - task: + must_complete: true + agents: + - seclab_taskflows.personalities.auditor + model: code_analysis + user_prompt: | + Fetch the sources of the repo {{ GLOBALS_repo }} and give a summary of the notes. + toolboxes: + - seclab_taskflows.toolboxes.codeql_python diff --git a/src/seclab_taskflows/toolboxes/codeql_python.yaml b/src/seclab_taskflows/toolboxes/codeql_python.yaml new file mode 100644 index 0000000..b2525c8 --- /dev/null +++ b/src/seclab_taskflows/toolboxes/codeql_python.yaml @@ -0,0 +1,48 @@ +# SPDX-FileCopyrightText: 2025 GitHub +# SPDX-License-Identifier: MIT + +seclab-taskflow-agent: + version: 1 + filetype: toolbox + +server_params: + kind: streamable + url: 'http://localhost:9998/mcp' + # if you set a command/args/env we will also start the server on demand + command: python + args: ["-m", "seclab_taskflows.mcp_servers.codeql_python.mcp_server"] + env: + CODEQL_DBS_BASE_PATH: "{{ env CODEQL_DBS_BASE_PATH }}" + # prevent git repo operations on gh codeql executions + GH_NO_UPDATE_NOTIFIER: "Disable" + GH_NO_EXTENSION_UPDATE_NOTIFIER: "Disable" + CODEQL_CLI: "{{ env CODEQL_CLI }}" + DATA_DIR: "{{ env DATA_DIR }}" +server_prompt: | + ## CodeQL Supported Programming Languages + + CodeQL supports the following languages, which you'll refer to by their + CodeQL acronyms and which are detailed below: + + - actions: GitHub Actions workflows + - cpp: The C and C++ programming language + - csharp: The C# programming language + - go: The Go programming language + - java: The Java programming language (including Kotlin) + - javascript: The JavaScript programming language (including TypeScript) + - python: The Python programming language + - ruby: The Ruby programming language + - rust: The Rust programming language + - swift: The Swift programming language + + When interacting with CodeQL databases, you will need to provide the + appropriate language acronym for the type of project contained within the + CodeQL database. + + For example, when interacting with a CodeQL database for a C based project + you would reference its language as `cpp` for any CodeQL database + interactions. + + If you are unable to determine the appropriate programming language acronym, + halt your task and ask the user to clarify which programming language the + CodeQL database in question was created for.