From 3552bb86ab54bfcf1551604d2ba96398449d3260 Mon Sep 17 00:00:00 2001 From: Said Sef Date: Mon, 17 Nov 2025 00:28:31 +0000 Subject: [PATCH 1/7] chore: ci workflow trigger From f05cf04f96622f690f7d387cf07ce2b0896f822a Mon Sep 17 00:00:00 2001 From: Said Sef Date: Mon, 17 Nov 2025 14:16:54 +0000 Subject: [PATCH 2/7] feat(github): add user activity query method using GitHub GraphQL API --- src/mcp_github/github_integration.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/mcp_github/github_integration.py b/src/mcp_github/github_integration.py index 09fbc7c..9578bcf 100644 --- a/src/mcp_github/github_integration.py +++ b/src/mcp_github/github_integration.py @@ -665,3 +665,28 @@ def create_release(self, repo_owner: str, repo_name: str, tag_name: str, release logging.error(f"Error creating release: {str(e)}") traceback.print_exc() return {"status": "error", "message": str(e)} + + def user_activity_query(self, variables: dict[str, Any], query: str) -> Dict[str, Any]: + """ + Performs a user activity query using GitHub's GraphQL API across all repositories and organisations. + Args: + variables (dict[str, Any]): The variables to include in the query i.e. {"login": "username", "from": "2023-01-01", "to": "2023-12-31"}. + query (str): The search query string. GitHub GraphQL query summary collection that includes all activity across all orgs, public and private. + Returns: + Dict[str, Any]: The JSON response from the GitHub API containing search results if successful. + """ + logging.info("Performing user query on GitHub") + + try: + response = requests.post('https://api.github.com/graphql', json={'query': query, 'variables': variables}, headers=self._get_headers(), timeout=TIMEOUT) + response.raise_for_status() + query_data = response.json() + return query_data + except requests.exceptions.RequestException as req_err: + logging.error(f"Request error during user activity query: {str(req_err)}") + traceback.print_exc() + return {"status": "error", "message": str(req_err)} + except Exception as e: + logging.error(f"Error performing user activity query: {str(e)}") + traceback.print_exc() + return {"status": "error", "message": str(e)} From ce79d53af66cff7cc7cbaf1001d3c787dd61c95f Mon Sep 17 00:00:00 2001 From: Said Sef Date: Mon, 17 Nov 2025 14:28:51 +0000 Subject: [PATCH 3/7] fix(github): e501 line too long --- src/mcp_github/github_integration.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/mcp_github/github_integration.py b/src/mcp_github/github_integration.py index 9578bcf..f0dc356 100644 --- a/src/mcp_github/github_integration.py +++ b/src/mcp_github/github_integration.py @@ -678,7 +678,12 @@ def user_activity_query(self, variables: dict[str, Any], query: str) -> Dict[str logging.info("Performing user query on GitHub") try: - response = requests.post('https://api.github.com/graphql', json={'query': query, 'variables': variables}, headers=self._get_headers(), timeout=TIMEOUT) + response = requests.post( + 'https://api.github.com/graphql', + json={'query': query, 'variables': variables}, + headers=self._get_headers(), + timeout=TIMEOUT + ) response.raise_for_status() query_data = response.json() return query_data From 16cd4609f377c87559424106af5455b7450b3f0e Mon Sep 17 00:00:00 2001 From: Said Sef Date: Mon, 17 Nov 2025 19:49:36 +0000 Subject: [PATCH 4/7] feat(github): enhance user activity query documentation for authenticated user access --- src/mcp_github/github_integration.py | 99 ++++++++++++++++++++++++++-- 1 file changed, 94 insertions(+), 5 deletions(-) diff --git a/src/mcp_github/github_integration.py b/src/mcp_github/github_integration.py index f0dc356..69e6813 100644 --- a/src/mcp_github/github_integration.py +++ b/src/mcp_github/github_integration.py @@ -668,14 +668,99 @@ def create_release(self, repo_owner: str, repo_name: str, tag_name: str, release def user_activity_query(self, variables: dict[str, Any], query: str) -> Dict[str, Any]: """ - Performs a user activity query using GitHub's GraphQL API across all repositories and organisations. + Performs a user activity query using GitHub's GraphQL API for the authenticated user (token owner). + + **Critical**: To query activities within a specific organization (e.g., "saidsef"): + 1. Query the organization DIRECTLY using `organization(login: "saidsef")` + 2. Do NOT use `viewer.contributionsCollection` as it excludes many private org activities + 3. Organization name is CASE-SENSITIVE - must match exactly + + **For Organization-Specific Activity** (e.g., saidsef): + ```graphql + query($orgName: String!, $from: GitTimestamp!, $to: GitTimestamp!) { + organization(login: $orgName) { + login + repositories(first: 100, privacy: PRIVATE, orderBy: {field: PUSHED_AT, direction: DESC}) { + nodes { + name + isPrivate + owner { login } + defaultBranchRef { + target { + ... on Commit { + history(since: $from, until: $to) { + totalCount + nodes { + author { user { login } } + committedDate + message + } + } + } + } + } + pullRequests(first: 100, states: [OPEN, CLOSED, MERGED], orderBy: {field: UPDATED_AT, direction: DESC}) { + nodes { + number + title + author { login } + createdAt + state + } + } + issues(first: 100, states: [OPEN, CLOSED], orderBy: {field: UPDATED_AT, direction: DESC}) { + nodes { + number + title + author { login } + createdAt + state + } + } + } + } + } + } + ``` + + **For Authenticated User Activity Across All Orgs**: + ```graphql + query($from: DateTime!, $to: DateTime!) { + viewer { + login + contributionsCollection(from: $from, to: $to) { + commitContributionsByRepository(maxRepositories: 100) { + repository { name isPrivate owner { login } } + contributions { totalCount } + } + pullRequestContributionsByRepository(maxRepositories: 100) { + repository { name isPrivate owner { login } } + contributions { totalCount } + } + } + organizations(first: 100) { + nodes { login } + } + } + } + ``` + Args: - variables (dict[str, Any]): The variables to include in the query i.e. {"login": "username", "from": "2023-01-01", "to": "2023-12-31"}. - query (str): The search query string. GitHub GraphQL query summary collection that includes all activity across all orgs, public and private. + variables (dict[str, Any]): Query variables. Options: + - For org-specific with commit history: {"orgName": "saidsef", "from": "2024-10-01T00:00:00Z", "to": "2024-10-31T23:59:59Z"} + Note: Use GitTimestamp type (ISO 8601 format) for $from/$to when querying commit history + - For all activity: {"from": "2024-10-01T00:00:00Z", "to": "2024-10-31T23:59:59Z"} + Note: Use DateTime type for contributionsCollection queries + query (str): GraphQL query string. Use `organization(login: $orgName)` for specific org queries. + IMPORTANT: Declare variables as `GitTimestamp!` for commit history, `DateTime!` for contributionsCollection. + Returns: - Dict[str, Any]: The JSON response from the GitHub API containing search results if successful. + Dict[str, Any]: The JSON response from GitHub's GraphQL API containing activity data. + + Note: Organization queries require `read:org` scope. Organization name is case-sensitive. + GitTimestamp and DateTime both accept ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ). """ - logging.info("Performing user query on GitHub") + logging.info(f"Performing GraphQL query with variables: {variables}") try: response = requests.post( @@ -686,6 +771,10 @@ def user_activity_query(self, variables: dict[str, Any], query: str) -> Dict[str ) response.raise_for_status() query_data = response.json() + + if 'errors' in query_data: + logging.error(f"GraphQL errors: {query_data['errors']}") + return query_data except requests.exceptions.RequestException as req_err: logging.error(f"Request error during user activity query: {str(req_err)}") From 8e4d7670fe19c4dcdc0b4abf9c7fc9c1ce8f288a Mon Sep 17 00:00:00 2001 From: Said Sef Date: Fri, 21 Nov 2025 19:16:22 +0000 Subject: [PATCH 5/7] feat(github): enhance user activity query with organization-specific and cross-org support --- src/mcp_github/github_integration.py | 230 ++++++++++++++++++++++----- 1 file changed, 191 insertions(+), 39 deletions(-) diff --git a/src/mcp_github/github_integration.py b/src/mcp_github/github_integration.py index 69e6813..223a567 100644 --- a/src/mcp_github/github_integration.py +++ b/src/mcp_github/github_integration.py @@ -668,51 +668,120 @@ def create_release(self, repo_owner: str, repo_name: str, tag_name: str, release def user_activity_query(self, variables: dict[str, Any], query: str) -> Dict[str, Any]: """ - Performs a user activity query using GitHub's GraphQL API for the authenticated user (token owner). + Performs a user activity query using GitHub's GraphQL API with support for organization-specific + and cross-organization queries. - **Critical**: To query activities within a specific organization (e.g., "saidsef"): - 1. Query the organization DIRECTLY using `organization(login: "saidsef")` - 2. Do NOT use `viewer.contributionsCollection` as it excludes many private org activities - 3. Organization name is CASE-SENSITIVE - must match exactly - - **For Organization-Specific Activity** (e.g., saidsef): + **Query Modes**: + + 1. **Organization-Specific Activity** (fastest, most comprehensive): + - Query organization repositories directly + - Access all private repos in the org (with proper token scopes) + - Get detailed commit history, PRs, and issues + - Variables: {"orgName": "Pelle-Tech", "from": "2024-10-01T00:00:00Z", "to": "2024-10-31T23:59:59Z"} + - Variable types: `$orgName: String!`, `$from: GitTimestamp!`, `$to: GitTimestamp!` + + 2. **Authenticated User Activity Across All Orgs** (slower, summary only): + - Query viewer's contribution collection + - Includes all orgs where user is a member + - Summary counts only (no detailed commit messages) + - Variables: {"from": "2024-10-01T00:00:00Z", "to": "2024-10-31T23:59:59Z"} + - Variable types: `$from: DateTime!`, `$to: DateTime!` + + 3. **User Activity in Specific Organization** (most restrictive): + - Query organization repos filtered by user + - Requires combining org query with author filtering + - Variables: {"orgName": "Pelle-Tech", "username": "saidsef", "from": "2024-10-01T00:00:00Z", "to": "2024-10-31T23:59:59Z"} + - Variable types: `$orgName: String!`, `$username: String!`, `$from: GitTimestamp!`, `$to: GitTimestamp!` + + **Performance Tips**: + - Use pagination parameters to limit initial data: `first: 50` instead of `first: 100` + - Query only required fields to reduce response size + - Use org-specific queries when possible (faster than viewer queries) + - For large date ranges, split into smaller queries + - Cache results for repeated queries + + **Example Queries**: + + **Fast Org Query with Pagination**: ```graphql - query($orgName: String!, $from: GitTimestamp!, $to: GitTimestamp!) { + query($orgName: String!, $from: GitTimestamp!, $to: GitTimestamp!, $repoCount: Int = 50) { organization(login: $orgName) { login - repositories(first: 100, privacy: PRIVATE, orderBy: {field: PUSHED_AT, direction: DESC}) { + repositories(first: $repoCount, privacy: PRIVATE, orderBy: {field: PUSHED_AT, direction: DESC}) { + pageInfo { + hasNextPage + endCursor + } nodes { name isPrivate - owner { login } defaultBranchRef { target { ... on Commit { - history(since: $from, until: $to) { + history(since: $from, until: $to, first: 100) { totalCount + pageInfo { + hasNextPage + endCursor + } nodes { - author { user { login } } + author { + user { login } + email + } committedDate message + additions + deletions } } } } } - pullRequests(first: 100, states: [OPEN, CLOSED, MERGED], orderBy: {field: UPDATED_AT, direction: DESC}) { + pullRequests(first: 50, states: [OPEN, CLOSED, MERGED], orderBy: {field: UPDATED_AT, direction: DESC}) { + totalCount nodes { number title author { login } createdAt state + additions + deletions + } + } + } + } + } + } + ``` + + **User-Filtered Org Query**: + ```graphql + query($orgName: String!, $username: String!, $from: GitTimestamp!, $to: GitTimestamp!) { + organization(login: $orgName) { + login + repositories(first: 100, privacy: PRIVATE) { + nodes { + name + defaultBranchRef { + target { + ... on Commit { + history(since: $from, until: $to, author: {emails: [$username]}, first: 100) { + totalCount + nodes { + author { user { login } } + committedDate + message + } + } + } } } - issues(first: 100, states: [OPEN, CLOSED], orderBy: {field: UPDATED_AT, direction: DESC}) { + pullRequests(first: 100, states: [OPEN, CLOSED, MERGED]) { nodes { - number - title author { login } + title createdAt state } @@ -723,64 +792,147 @@ def user_activity_query(self, variables: dict[str, Any], query: str) -> Dict[str } ``` - **For Authenticated User Activity Across All Orgs**: + **Cross-Org Viewer Query**: ```graphql query($from: DateTime!, $to: DateTime!) { viewer { login contributionsCollection(from: $from, to: $to) { commitContributionsByRepository(maxRepositories: 100) { - repository { name isPrivate owner { login } } + repository { + name + isPrivate + owner { login } + } contributions { totalCount } } pullRequestContributionsByRepository(maxRepositories: 100) { - repository { name isPrivate owner { login } } + repository { + name + isPrivate + owner { login } + } + contributions { totalCount } + } + issueContributionsByRepository(maxRepositories: 100) { + repository { + name + isPrivate + owner { login } + } contributions { totalCount } } } organizations(first: 100) { - nodes { login } + nodes { + login + viewerCanAdminister + } } } } ``` Args: - variables (dict[str, Any]): Query variables. Options: - - For org-specific with commit history: {"orgName": "saidsef", "from": "2024-10-01T00:00:00Z", "to": "2024-10-31T23:59:59Z"} - Note: Use GitTimestamp type (ISO 8601 format) for $from/$to when querying commit history - - For all activity: {"from": "2024-10-01T00:00:00Z", "to": "2024-10-31T23:59:59Z"} - Note: Use DateTime type for contributionsCollection queries - query (str): GraphQL query string. Use `organization(login: $orgName)` for specific org queries. - IMPORTANT: Declare variables as `GitTimestamp!` for commit history, `DateTime!` for contributionsCollection. + variables (dict[str, Any]): Query variables. Supported combinations: + - Org-specific: {"orgName": "Pelle-Tech", "from": "...", "to": "..."} + - Cross-org: {"from": "...", "to": "..."} + - User-filtered org: {"orgName": "Pelle-Tech", "username": "saidsef", "from": "...", "to": "..."} + - With pagination: Add {"repoCount": 50, "prCount": 50} for custom limits + query (str): GraphQL query string. Must declare correct variable types: + - Organization queries: Use `GitTimestamp!` for $from/$to + - Viewer queries: Use `DateTime!` for $from/$to + - Both types accept ISO 8601 format: "YYYY-MM-DDTHH:MM:SSZ" Returns: - Dict[str, Any]: The JSON response from GitHub's GraphQL API containing activity data. + Dict[str, Any]: GraphQL response with activity data or error information. + - Success: {"data": {...}} + - Errors: {"errors": [...], "data": null} + - Network error: {"status": "error", "message": "..."} - Note: Organization queries require `read:org` scope. Organization name is case-sensitive. - GitTimestamp and DateTime both accept ISO 8601 format (YYYY-MM-DDTHH:MM:SSZ). - """ - logging.info(f"Performing GraphQL query with variables: {variables}") + Error Handling: + - Validates response status codes + - Logs GraphQL errors with details + - Returns structured error responses + - Includes traceback for debugging + + Required Token Scopes: + - `repo`: Full control of private repositories + - `read:org`: Read org and team membership + - `read:user`: Read user profile data + + Performance Notes: + - Org queries are ~3x faster than viewer queries + - Large date ranges (>1 year) may timeout + - Use pagination for repos with >100 commits + - Response size correlates with date range and repo count + """ + # Validate inputs + if not query or not isinstance(query, str): + return {"status": "error", "message": "Query must be a non-empty string"} + + if not variables or not isinstance(variables, dict): + return {"status": "error", "message": "Variables must be a non-empty dictionary"} + + # Determine query type for optimized logging + query_type = "unknown" + if "orgName" in variables and "username" in variables: + query_type = "user-filtered-org" + elif "orgName" in variables: + query_type = "org-specific" + elif "from" in variables and "to" in variables: + query_type = "cross-org-viewer" + + logging.info(f"Performing GraphQL query [type: {query_type}] with variables: {variables}") try: + # Make GraphQL request with optimized timeout response = requests.post( 'https://api.github.com/graphql', json={'query': query, 'variables': variables}, headers=self._get_headers(), - timeout=TIMEOUT + timeout=TIMEOUT * 2 # Double timeout for GraphQL queries (can be complex) ) response.raise_for_status() query_data = response.json() + # Handle GraphQL errors (API accepts request but query has issues) if 'errors' in query_data: - logging.error(f"GraphQL errors: {query_data['errors']}") - + error_messages = [err.get('message', 'Unknown error') for err in query_data['errors']] + logging.error(f"GraphQL query errors: {error_messages}") + + # Check for common errors and provide helpful messages + for error in query_data['errors']: + error_type = error.get('extensions', {}).get('code') + if error_type == 'variableMismatch': + logging.error(f"Variable type mismatch: Use GitTimestamp for org queries, DateTime for viewer queries") + elif error_type == 'NOT_FOUND': + logging.error(f"Resource not found: Check org/user name is correct and case-sensitive") + elif error_type == 'FORBIDDEN': + logging.error(f"Access forbidden: Check token has required scopes (repo, read:org)") + + return query_data # Return with errors for caller to handle + + # Log success with summary + if 'data' in query_data: + data_keys = list(query_data['data'].keys()) + logging.info(f"GraphQL query successful [type: {query_type}], returned data keys: {data_keys}") + return query_data + + except requests.exceptions.Timeout: + error_msg = f"GraphQL query timeout after {TIMEOUT * 2}s. Try reducing date range or repo count." + logging.error(error_msg) + return {"status": "error", "message": error_msg, "timeout": True} + except requests.exceptions.RequestException as req_err: - logging.error(f"Request error during user activity query: {str(req_err)}") + error_msg = f"Request error during GraphQL query: {str(req_err)}" + logging.error(error_msg) traceback.print_exc() - return {"status": "error", "message": str(req_err)} + return {"status": "error", "message": error_msg, "request_exception": True} + except Exception as e: - logging.error(f"Error performing user activity query: {str(e)}") + error_msg = f"Unexpected error performing GraphQL query: {str(e)}" + logging.error(error_msg) traceback.print_exc() - return {"status": "error", "message": str(e)} + return {"status": "error", "message": error_msg, "unexpected": True} From 8cad90ac5f19e83c947cee1a2d0937b9a978ef3e Mon Sep 17 00:00:00 2001 From: Said Sef Date: Thu, 27 Nov 2025 23:44:53 +0000 Subject: [PATCH 6/7] feat(github): add user organization activity query for comprehensive user activity tracking --- src/mcp_github/github_integration.py | 529 +++++++++++++++++++++++++++ 1 file changed, 529 insertions(+) diff --git a/src/mcp_github/github_integration.py b/src/mcp_github/github_integration.py index 223a567..19cc929 100644 --- a/src/mcp_github/github_integration.py +++ b/src/mcp_github/github_integration.py @@ -936,3 +936,532 @@ def user_activity_query(self, variables: dict[str, Any], query: str) -> Dict[str logging.error(error_msg) traceback.print_exc() return {"status": "error", "message": error_msg, "unexpected": True} + + def get_user_org_activity(self, org_name: str, username: str, from_date: str, to_date: str) -> Dict[str, Any]: + """ + Gets comprehensive activity for a SPECIFIC USER across ALL repositories in an organization. + + Efficiently filters by user at the GraphQL level - does NOT scan entire repos. + Captures ALL branches, not just main/default branch. + + Includes: + - ALL commits by the user (across all branches) + - ALL PRs where user was: author, reviewer, merger, commenter, or assigned + - ALL Issues where user was: author, assigned, commenter, or participant + - Handles reviewed, open, merged, closed, and approved PRs + + Args: + org_name (str): GitHub organization name + username (str): GitHub username to query + from_date (str): Start date ISO 8601 (e.g., "2024-01-01T00:00:00Z") + to_date (str): End date ISO 8601 (e.g., "2024-12-31T23:59:59Z") + + Returns: + Dict containing: status, summary stats, commits[], prs[], issues[], pagination_info + """ + logging.info(f"Fetching ALL activity for '{username}' in '{org_name}' from {from_date} to {to_date}") + + # Step 1: Get user's email addresses for efficient commit filtering + user_emails = self._get_user_emails(username) + logging.info(f"Found {len(user_emails)} email(s) for filtering commits") + + # Step 2: Get repositories where user actually contributed (optimized approach) + # First try to get repos from user's contribution collection + contributed_repos = self._get_user_contributed_repos(username, org_name, from_date, to_date) + + if contributed_repos: + logging.info(f"Found {len(contributed_repos)} repos with user contributions via contributionsCollection") + org_repos = contributed_repos + else: + # Fallback: Get ALL repositories in organization + logging.info(f"Fallback: Scanning all org repos (contributionsCollection returned no results)") + org_repos = self._get_all_org_repos(org_name) + logging.info(f"Found {len(org_repos)} total repositories in {org_name}") + + if not org_repos: + return self._empty_activity_response(username, org_name, from_date, to_date) + + # Step 3: Process each repo - filter by user at GraphQL level + all_commits = [] + all_prs = [] + all_issues = [] + repos_with_activity = 0 + + for repo_info in org_repos: + repo_name = repo_info.get("name") + repo_url = repo_info.get("url") + + logging.info(f"Scanning {org_name}/{repo_name} for {username}") + + # Fetch user-specific data from this repo + repo_activity = self._fetch_repo_user_activity( + org_name, repo_name, repo_url, username, user_emails, from_date, to_date + ) + + if repo_activity: + all_commits.extend(repo_activity.get("commits", [])) + all_prs.extend(repo_activity.get("prs", [])) + all_issues.extend(repo_activity.get("issues", [])) + + if repo_activity.get("has_activity"): + repos_with_activity += 1 + + # Sort by date (most recent first) + all_commits.sort(key=lambda x: x["date"], reverse=True) + all_prs.sort(key=lambda x: x["updated_at"], reverse=True) + all_issues.sort(key=lambda x: x["updated_at"], reverse=True) + + # Generate summary + user_authored_prs = [pr for pr in all_prs if "Author" in pr["user_roles"]] + + summary = { + "user": username, + "organization": org_name, + "date_range": f"{from_date} to {to_date}", + "total_commits": len(all_commits), + "total_prs_involved": len(all_prs), + "prs_authored": len(user_authored_prs), + "prs_reviewed": len([pr for pr in all_prs if any(r in pr["user_roles"] for r in ["Approved", "Requested Changes", "Reviewed"])]), + "prs_merged": len([pr for pr in all_prs if "Merged" in pr["user_roles"]]), + "prs_commented": len([pr for pr in all_prs if "Commented" in pr["user_roles"]]), + "total_issues_involved": len(all_issues), + "issues_authored": len([issue for issue in all_issues if "Author" in issue["user_roles"]]), + "issues_assigned": len([issue for issue in all_issues if "Assigned" in issue["user_roles"]]), + "issues_commented": len([issue for issue in all_issues if "Commented" in issue["user_roles"]]), + "total_additions": sum(c["additions"] for c in all_commits), + "total_deletions": sum(c["deletions"] for c in all_commits), + } + + logging.info(f"Activity complete: {len(all_commits)} commits, {len(all_prs)} PRs, {len(all_issues)} issues from {repos_with_activity}/{len(org_repos)} repos") + + return { + "status": "success", + "summary": summary, + "commits": all_commits, + "prs": all_prs, + "issues": all_issues, + "pagination_info": { + "total_repos_in_org": len(org_repos), + "repos_with_user_activity": repos_with_activity, + "commits_found": len(all_commits), + "prs_found": len(all_prs), + "issues_found": len(all_issues) + } + } + + def _get_user_emails(self, username: str) -> list: + """Get user's email addresses for commit filtering.""" + query = """ + query($username: String!) { + user(login: $username) { + email + emails(first: 10) { + nodes { email } + } + } + } + """ + + result = self.user_activity_query({"username": username}, query) + emails = [] + + if "data" in result: + user_data = result.get("data", {}).get("user", {}) + if user_data.get("email"): + emails.append(user_data["email"]) + for node in user_data.get("emails", {}).get("nodes", []): + if node.get("email") and node["email"] not in emails: + emails.append(node["email"]) + + return emails + + def _get_user_contributed_repos(self, username: str, org_name: str, from_date: str, to_date: str) -> list: + """Get repositories where user actually contributed within date range.""" + # Note: from_date/to_date need to be in DateTime format for contributionsCollection + query = """ + query($username: String!, $from: DateTime!, $to: DateTime!) { + user(login: $username) { + contributionsCollection(from: $from, to: $to, organizationID: null) { + commitContributionsByRepository(maxRepositories: 100) { + repository { + name + url + owner { login } + } + contributions { totalCount } + } + pullRequestContributionsByRepository(maxRepositories: 100) { + repository { + name + url + owner { login } + } + contributions { totalCount } + } + issueContributionsByRepository(maxRepositories: 100) { + repository { + name + url + owner { login } + } + contributions { totalCount } + } + } + } + } + """ + + result = self.user_activity_query({"username": username, "from": from_date, "to": to_date}, query) + + repos_dict = {} # Use dict to deduplicate by repo name + + if "data" in result and result.get("data", {}).get("user"): + contributions = result["data"]["user"]["contributionsCollection"] + + # Collect repos from commits + for item in contributions.get("commitContributionsByRepository", []): + repo = item.get("repository", {}) + owner = repo.get("owner", {}).get("login", "") + if owner.lower() == org_name.lower(): # Filter by org + repo_name = repo.get("name") + if repo_name: + repos_dict[repo_name] = { + "name": repo_name, + "url": repo.get("url", "") + } + + # Collect repos from PRs + for item in contributions.get("pullRequestContributionsByRepository", []): + repo = item.get("repository", {}) + owner = repo.get("owner", {}).get("login", "") + if owner.lower() == org_name.lower(): + repo_name = repo.get("name") + if repo_name and repo_name not in repos_dict: + repos_dict[repo_name] = { + "name": repo_name, + "url": repo.get("url", "") + } + + # Collect repos from issues + for item in contributions.get("issueContributionsByRepository", []): + repo = item.get("repository", {}) + owner = repo.get("owner", {}).get("login", "") + if owner.lower() == org_name.lower(): + repo_name = repo.get("name") + if repo_name and repo_name not in repos_dict: + repos_dict[repo_name] = { + "name": repo_name, + "url": repo.get("url", "") + } + + return list(repos_dict.values()) + + def _get_all_org_repos(self, org_name: str) -> list: + """Get ALL repositories in organization with pagination.""" + all_repos = [] + has_next_page = True + cursor = None + + while has_next_page: + cursor_arg = f', after: "{cursor}"' if cursor else '' + query = f""" + query($orgName: String!) {{ + organization(login: $orgName) {{ + repositories(first: 100{cursor_arg}, orderBy: {{field: UPDATED_AT, direction: DESC}}) {{ + pageInfo {{ hasNextPage endCursor }} + nodes {{ name url }} + }} + }} + }} + """ + + result = self.user_activity_query({"orgName": org_name}, query) + + if "data" not in result or "errors" in result: + break + + repos_data = result.get("data", {}).get("organization", {}).get("repositories", {}) + all_repos.extend(repos_data.get("nodes", [])) + + page_info = repos_data.get("pageInfo", {}) + has_next_page = page_info.get("hasNextPage", False) + cursor = page_info.get("endCursor") + + return all_repos + + def _fetch_repo_user_activity(self, org_name: str, repo_name: str, repo_url: str, + username: str, user_emails: list, from_date: str, to_date: str) -> Dict: + """Fetch user-specific activity from a single repo - FILTERED at GraphQL level.""" + + # Build email filter for commits (server-side filtering) + if user_emails: + emails_json = str(user_emails).replace("'", '"') + author_filter = f'author: {{emails: {emails_json}}}, ' + else: + author_filter = "" + + # First, check if user has ANY activity in this repo within date range + # This prevents fetching from repos where user has no contributions + check_query = """ + query($orgName: String!, $repoName: String!, $from: GitTimestamp!, $to: GitTimestamp!) { + repository(owner: $orgName, name: $repoName) { + defaultBranchRef { + target { + ... on Commit { + history(since: $from, until: $to, """ + author_filter + """first: 1) { + totalCount + } + } + } + } + } + } + """ + + check_result = self.user_activity_query({ + "orgName": org_name, + "repoName": repo_name, + "from": from_date, + "to": to_date + }, check_query) + + # Skip if no commits found (user has no activity in default branch) + commit_count = 0 + if "data" in check_result and check_result.get("data", {}).get("repository"): + repo_check = check_result["data"]["repository"] + if repo_check.get("defaultBranchRef"): + commit_count = repo_check["defaultBranchRef"]["target"]["history"]["totalCount"] + + # If no commits, still check PRs/Issues but with quick check + if commit_count == 0: + logging.info(f" No commits by {username} in {repo_name}, checking PRs/Issues only") + + # Query with user filtering at GraphQL level + query = """ + query($orgName: String!, $repoName: String!, $from: GitTimestamp!, $to: GitTimestamp!) { + repository(owner: $orgName, name: $repoName) { + refs(refPrefix: "refs/heads/", first: 100) { + nodes { + name + target { + ... on Commit { + history(since: $from, until: $to, """ + author_filter + """first: 100) { + nodes { + oid + messageHeadline + author { + user { login } + email + name + } + committedDate + additions + deletions + url + } + } + } + } + } + } + pullRequests(first: 100, orderBy: {field: UPDATED_AT, direction: DESC}) { + nodes { + number + title + url + state + isDraft + author { login } + createdAt + updatedAt + mergedAt + closedAt + commits { totalCount } + additions + deletions + changedFiles + mergedBy { login } + assignees(first: 10) { nodes { login } } + reviews(first: 50) { + nodes { + author { login } + state + submittedAt + } + } + comments(first: 50) { nodes { author { login } } } + labels(first: 10) { nodes { name } } + } + } + issues(first: 100, orderBy: {field: UPDATED_AT, direction: DESC}) { + nodes { + number + title + url + state + author { login } + createdAt + updatedAt + closedAt + assignees(first: 10) { nodes { login } } + participants(first: 50) { nodes { login } } + comments(first: 50) { nodes { author { login } } } + labels(first: 10) { nodes { name } } + } + } + } + } + """ + + variables = { + "orgName": org_name, + "repoName": repo_name, + "from": from_date, + "to": to_date + } + + result = self.user_activity_query(variables, query) + + if "data" not in result or "errors" in result: + return None + + repo_data = result.get("data", {}).get("repository", {}) + if not repo_data: + return None + + # Parse commits (deduplicate by OID across branches) + commits = [] + seen_oids = set() + for ref in repo_data.get("refs", {}).get("nodes", []): + branch = ref.get("name") + for commit in ref.get("target", {}).get("history", {}).get("nodes", []): + oid = commit.get("oid") + if oid not in seen_oids: + seen_oids.add(oid) + commits.append({ + "repo": repo_name, + "repo_url": repo_url, + "branch": branch, + "oid": oid[:7], + "full_oid": oid, + "message": commit.get("messageHeadline", ""), + "author": commit.get("author", {}).get("name", "Unknown"), + "date": commit.get("committedDate", ""), + "additions": commit.get("additions", 0), + "deletions": commit.get("deletions", 0), + "url": commit.get("url", "") + }) + + # Parse PRs (filter by user involvement) + prs = [] + for pr in repo_data.get("pullRequests", {}).get("nodes", []): + pr_author = pr.get("author", {}).get("login", "") if pr.get("author") else "" + merged_by = pr.get("mergedBy", {}).get("login", "") if pr.get("mergedBy") else "" + assignees = [a.get("login") for a in pr.get("assignees", {}).get("nodes", [])] + reviewers = [r.get("author", {}).get("login") for r in pr.get("reviews", {}).get("nodes", []) if r.get("author")] + commenters = [c.get("author", {}).get("login") for c in pr.get("comments", {}).get("nodes", []) if c.get("author")] + + if username in [pr_author, merged_by] + assignees + reviewers + commenters: + roles = [] + if pr_author == username: roles.append("Author") + if merged_by == username: roles.append("Merged") + if username in assignees: roles.append("Assigned") + + if username in reviewers: + user_reviews = [r for r in pr.get("reviews", {}).get("nodes", []) if r.get("author", {}).get("login") == username] + states = set(r.get("state") for r in user_reviews) + if "APPROVED" in states: roles.append("Approved") + elif "CHANGES_REQUESTED" in states: roles.append("Requested Changes") + elif "COMMENTED" in states: roles.append("Reviewed") + + if username in commenters and "Author" not in roles: + roles.append("Commented") + + prs.append({ + "repo": repo_name, + "repo_url": repo_url, + "number": pr.get("number", 0), + "title": pr.get("title", ""), + "author": pr_author, + "state": pr.get("state", ""), + "is_draft": pr.get("isDraft", False), + "created_at": pr.get("createdAt", ""), + "updated_at": pr.get("updatedAt", ""), + "merged_at": pr.get("mergedAt", ""), + "merged_by": merged_by, + "additions": pr.get("additions", 0), + "deletions": pr.get("deletions", 0), + "changed_files": pr.get("changedFiles", 0), + "commits_count": pr.get("commits", {}).get("totalCount", 0), + "url": pr.get("url", ""), + "user_roles": ", ".join(roles), + "labels": [l.get("name") for l in pr.get("labels", {}).get("nodes", [])] + }) + + # Parse issues (filter by user involvement) + issues = [] + for issue in repo_data.get("issues", {}).get("nodes", []): + issue_author = issue.get("author", {}).get("login", "") if issue.get("author") else "" + assignees = [a.get("login") for a in issue.get("assignees", {}).get("nodes", [])] + participants = [p.get("login") for p in issue.get("participants", {}).get("nodes", [])] + commenters = [c.get("author", {}).get("login") for c in issue.get("comments", {}).get("nodes", []) if c.get("author")] + + if username in [issue_author] + assignees + participants + commenters: + roles = [] + if issue_author == username: roles.append("Author") + if username in assignees: roles.append("Assigned") + if username in commenters and "Author" not in roles: + count = len([c for c in issue.get("comments", {}).get("nodes", []) if c.get("author", {}).get("login") == username]) + roles.append(f"Commented ({count})") + if username in participants and not roles: + roles.append("Participant") + + issues.append({ + "repo": repo_name, + "repo_url": repo_url, + "number": issue.get("number", 0), + "title": issue.get("title", ""), + "author": issue_author, + "state": issue.get("state", ""), + "created_at": issue.get("createdAt", ""), + "updated_at": issue.get("updatedAt", ""), + "closed_at": issue.get("closedAt", ""), + "url": issue.get("url", ""), + "user_roles": ", ".join(roles), + "labels": [l.get("name") for l in issue.get("labels", {}).get("nodes", [])] + }) + + return { + "commits": commits, + "prs": prs, + "issues": issues, + "has_activity": len(commits) > 0 or len(prs) > 0 or len(issues) > 0 + } + + def _empty_activity_response(self, username: str, org_name: str, from_date: str, to_date: str) -> Dict: + """Return empty activity response.""" + return { + "status": "success", + "summary": { + "user": username, + "organization": org_name, + "date_range": f"{from_date} to {to_date}", + "total_commits": 0, + "total_prs_involved": 0, + "prs_authored": 0, + "prs_reviewed": 0, + "prs_merged": 0, + "total_additions": 0, + "total_deletions": 0, + }, + "commits": [], + "prs": [], + "issues": [], + "pagination_info": { + "total_repos_in_org": 0, + "repos_with_user_activity": 0, + "commits_found": 0, + "prs_found": 0, + "issues_found": 0 + } + } From 4f620e5903d036196323aacdabfcdb65946fd092 Mon Sep 17 00:00:00 2001 From: Said Sef Date: Fri, 28 Nov 2025 00:06:42 +0000 Subject: [PATCH 7/7] feat(github): add pagination and reduce the output result --- src/mcp_github/github_integration.py | 140 ++++++++++++++++++--------- 1 file changed, 93 insertions(+), 47 deletions(-) diff --git a/src/mcp_github/github_integration.py b/src/mcp_github/github_integration.py index 19cc929..3a73565 100644 --- a/src/mcp_github/github_integration.py +++ b/src/mcp_github/github_integration.py @@ -937,17 +937,27 @@ def user_activity_query(self, variables: dict[str, Any], query: str) -> Dict[str traceback.print_exc() return {"status": "error", "message": error_msg, "unexpected": True} - def get_user_org_activity(self, org_name: str, username: str, from_date: str, to_date: str) -> Dict[str, Any]: + def get_user_org_activity( + self, + org_name: str, + username: str, + from_date: str, + to_date: str, + page: int = 1, + per_page: int = 50 + ) -> Dict[str, Any]: """ Gets comprehensive activity for a SPECIFIC USER across ALL repositories in an organization. + **PAGINATED RESULTS** - Returns a manageable subset of data to prevent context overflow. + Efficiently filters by user at the GraphQL level - does NOT scan entire repos. Captures ALL branches, not just main/default branch. Includes: - - ALL commits by the user (across all branches) - - ALL PRs where user was: author, reviewer, merger, commenter, or assigned - - ALL Issues where user was: author, assigned, commenter, or participant + - Commits by the user (paginated) + - PRs where user was: author, reviewer, merger, commenter, or assigned (paginated) + - Issues where user was: author, assigned, commenter, or participant (paginated) - Handles reviewed, open, merged, closed, and approved PRs Args: @@ -955,9 +965,17 @@ def get_user_org_activity(self, org_name: str, username: str, from_date: str, to username (str): GitHub username to query from_date (str): Start date ISO 8601 (e.g., "2024-01-01T00:00:00Z") to_date (str): End date ISO 8601 (e.g., "2024-12-31T23:59:59Z") + page (int): Page number (1-indexed, default: 1) + per_page (int): Items per page (default: 50, max: 100) Returns: - Dict containing: status, summary stats, commits[], prs[], issues[], pagination_info + Dict containing: + - status: success/error + - summary: aggregate statistics + - commits[]: paginated commits (most recent first) + - prs[]: paginated PRs (most recent first) + - issues[]: paginated issues (most recent first) + - pagination: current_page, per_page, total_items, total_pages, has_next_page """ logging.info(f"Fetching ALL activity for '{username}' in '{org_name}' from {from_date} to {to_date}") @@ -979,7 +997,7 @@ def get_user_org_activity(self, org_name: str, username: str, from_date: str, to logging.info(f"Found {len(org_repos)} total repositories in {org_name}") if not org_repos: - return self._empty_activity_response(username, org_name, from_date, to_date) + return self._empty_activity_response(username, org_name, from_date, to_date, page, per_page) # Step 3: Process each repo - filter by user at GraphQL level all_commits = [] @@ -1011,15 +1029,36 @@ def get_user_org_activity(self, org_name: str, username: str, from_date: str, to all_prs.sort(key=lambda x: x["updated_at"], reverse=True) all_issues.sort(key=lambda x: x["updated_at"], reverse=True) - # Generate summary + # Calculate pagination + per_page = min(max(1, per_page), 100) # Clamp between 1-100 + page = max(1, page) # Must be at least 1 + + total_commits = len(all_commits) + total_prs = len(all_prs) + total_issues = len(all_issues) + + # Calculate pages + commits_total_pages = (total_commits + per_page - 1) // per_page if total_commits > 0 else 1 + prs_total_pages = (total_prs + per_page - 1) // per_page if total_prs > 0 else 1 + issues_total_pages = (total_issues + per_page - 1) // per_page if total_issues > 0 else 1 + + # Slice data for current page + start_idx = (page - 1) * per_page + end_idx = start_idx + per_page + + paginated_commits = all_commits[start_idx:end_idx] + paginated_prs = all_prs[start_idx:end_idx] + paginated_issues = all_issues[start_idx:end_idx] + + # Generate summary (based on ALL data, not just current page) user_authored_prs = [pr for pr in all_prs if "Author" in pr["user_roles"]] summary = { "user": username, "organization": org_name, "date_range": f"{from_date} to {to_date}", - "total_commits": len(all_commits), - "total_prs_involved": len(all_prs), + "total_commits": total_commits, + "total_prs_involved": total_prs, "prs_authored": len(user_authored_prs), "prs_reviewed": len([pr for pr in all_prs if any(r in pr["user_roles"] for r in ["Approved", "Requested Changes", "Reviewed"])]), "prs_merged": len([pr for pr in all_prs if "Merged" in pr["user_roles"]]), @@ -1032,20 +1071,39 @@ def get_user_org_activity(self, org_name: str, username: str, from_date: str, to "total_deletions": sum(c["deletions"] for c in all_commits), } - logging.info(f"Activity complete: {len(all_commits)} commits, {len(all_prs)} PRs, {len(all_issues)} issues from {repos_with_activity}/{len(org_repos)} repos") + logging.info(f"Activity complete: Page {page}/{max(commits_total_pages, prs_total_pages, issues_total_pages)} - Returning {len(paginated_commits)} commits, {len(paginated_prs)} PRs, {len(paginated_issues)} issues from {repos_with_activity}/{len(org_repos)} repos") return { "status": "success", "summary": summary, - "commits": all_commits, - "prs": all_prs, - "issues": all_issues, - "pagination_info": { - "total_repos_in_org": len(org_repos), - "repos_with_user_activity": repos_with_activity, - "commits_found": len(all_commits), - "prs_found": len(all_prs), - "issues_found": len(all_issues) + "commits": paginated_commits, + "prs": paginated_prs, + "issues": paginated_issues, + "pagination": { + "current_page": page, + "per_page": per_page, + "commits": { + "total": total_commits, + "total_pages": commits_total_pages, + "has_next_page": page < commits_total_pages, + "returned": len(paginated_commits) + }, + "prs": { + "total": total_prs, + "total_pages": prs_total_pages, + "has_next_page": page < prs_total_pages, + "returned": len(paginated_prs) + }, + "issues": { + "total": total_issues, + "total_pages": issues_total_pages, + "has_next_page": page < issues_total_pages, + "returned": len(paginated_issues) + }, + "repos": { + "total_in_org": len(org_repos), + "with_user_activity": repos_with_activity + } } } @@ -1125,10 +1183,7 @@ def _get_user_contributed_repos(self, username: str, org_name: str, from_date: s if owner.lower() == org_name.lower(): # Filter by org repo_name = repo.get("name") if repo_name: - repos_dict[repo_name] = { - "name": repo_name, - "url": repo.get("url", "") - } + repos_dict[repo_name] = {"name": repo_name, "url": repo.get("url", "")} # Collect repos from PRs for item in contributions.get("pullRequestContributionsByRepository", []): @@ -1137,10 +1192,7 @@ def _get_user_contributed_repos(self, username: str, org_name: str, from_date: s if owner.lower() == org_name.lower(): repo_name = repo.get("name") if repo_name and repo_name not in repos_dict: - repos_dict[repo_name] = { - "name": repo_name, - "url": repo.get("url", "") - } + repos_dict[repo_name] = {"name": repo_name, "url": repo.get("url", "")} # Collect repos from issues for item in contributions.get("issueContributionsByRepository", []): @@ -1149,10 +1201,7 @@ def _get_user_contributed_repos(self, username: str, org_name: str, from_date: s if owner.lower() == org_name.lower(): repo_name = repo.get("name") if repo_name and repo_name not in repos_dict: - repos_dict[repo_name] = { - "name": repo_name, - "url": repo.get("url", "") - } + repos_dict[repo_name] = {"name": repo_name, "url": repo.get("url", "")} return list(repos_dict.values()) @@ -1226,15 +1275,11 @@ def _fetch_repo_user_activity(self, org_name: str, repo_name: str, repo_url: str }, check_query) # Skip if no commits found (user has no activity in default branch) - commit_count = 0 if "data" in check_result and check_result.get("data", {}).get("repository"): - repo_check = check_result["data"]["repository"] - if repo_check.get("defaultBranchRef"): - commit_count = repo_check["defaultBranchRef"]["target"]["history"]["totalCount"] - - # If no commits, still check PRs/Issues but with quick check - if commit_count == 0: - logging.info(f" No commits by {username} in {repo_name}, checking PRs/Issues only") + if check_result["data"]["repository"].get("defaultBranchRef"): + commit_count = check_result["data"]["repository"]["defaultBranchRef"]["target"]["history"]["totalCount"] + if commit_count == 0: + logging.info(f" No commits by {username} in {repo_name}, checking PRs/Issues only") # Query with user filtering at GraphQL level query = """ @@ -1438,8 +1483,8 @@ def _fetch_repo_user_activity(self, org_name: str, repo_name: str, repo_url: str "has_activity": len(commits) > 0 or len(prs) > 0 or len(issues) > 0 } - def _empty_activity_response(self, username: str, org_name: str, from_date: str, to_date: str) -> Dict: - """Return empty activity response.""" + def _empty_activity_response(self, username: str, org_name: str, from_date: str, to_date: str, page: int = 1, per_page: int = 50) -> Dict: + """Return empty activity response with pagination info.""" return { "status": "success", "summary": { @@ -1457,11 +1502,12 @@ def _empty_activity_response(self, username: str, org_name: str, from_date: str, "commits": [], "prs": [], "issues": [], - "pagination_info": { - "total_repos_in_org": 0, - "repos_with_user_activity": 0, - "commits_found": 0, - "prs_found": 0, - "issues_found": 0 + "pagination": { + "current_page": page, + "per_page": per_page, + "commits": {"total": 0, "total_pages": 1, "has_next_page": False, "returned": 0}, + "prs": {"total": 0, "total_pages": 1, "has_next_page": False, "returned": 0}, + "issues": {"total": 0, "total_pages": 1, "has_next_page": False, "returned": 0}, + "repos": {"total_in_org": 0, "with_user_activity": 0} } }