Enhance review with output-format=json for GitHub actions!

TechNickAI · TechNickAI · commit f1851d2280a2 · 2023-07-12T01:28:22.000-07:00
The code review functionality has been expanded to support structured output in JSON format, and the response token size is now configurable. The review prompt has been updated to provide more detailed instructions. The test coverage has been improved to include tests for the new functionality. Additionally, the 'json' module has been imported in 'cli.py' for handling JSON output.
diff --git a/aicodebot/cli.py b/aicodebot/cli.py
@@ -13,7 +13,7 @@
 from rich.live import Live
 from rich.markdown import Markdown
 from rich.style import Style
-import click, datetime, openai, os, random, subprocess, sys, tempfile, webbrowser, yaml
+import click, datetime, json, openai, os, random, subprocess, sys, tempfile, webbrowser, yaml
 
 # ----------------------------- Default settings ----------------------------- #
 
@@ -327,7 +327,9 @@ def fun_fact(verbose, response_token_size):
 @cli.command
 @click.option("-c", "--commit", help="The commit hash to review (otherwise look at [un]staged changes).")
 @click.option("-v", "--verbose", count=True)
-def review(commit, verbose):
+@click.option("--output-format", default="text", type=click.Choice(["text", "json"], case_sensitive=False))
+@click.option("-t", "--response-token-size", type=int, default=DEFAULT_MAX_TOKENS * 2)
+def review(commit, verbose, output_format, response_token_size):
     """Do a code review, with [un]staged changes, or a specified commit."""
     setup_config()
 
@@ -337,7 +339,7 @@ def review(commit, verbose):
         sys.exit(0)
 
     # Load the prompt
-    prompt = get_prompt("review")
+    prompt = get_prompt("review", structured_output=output_format == "json")
     logger.trace(f"Prompt: {prompt}")
 
     # Check the size of the diff context and adjust accordingly
@@ -347,19 +349,27 @@ def review(commit, verbose):
     if model_name is None:
         raise click.ClickException(f"The diff is too large to review ({request_token_size} tokens). 😢")
 
-    with Live(Markdown(""), auto_refresh=True) as live:
-        llm = Coder.get_llm(
-            model_name,
-            verbose,
-            response_token_size=response_token_size,
-            streaming=True,
-            callbacks=[RichLiveCallbackHandler(live, bot_style)],
-        )
+    llm = Coder.get_llm(model_name, verbose, response_token_size, streaming=True)
+    chain = LLMChain(llm=llm, prompt=prompt, verbose=verbose)
 
-        # Set up the chain
-        chain = LLMChain(llm=llm, prompt=prompt, verbose=verbose)
+    if output_format == "json":
+        with console.status("Examining the diff and generating the review", spinner=DEFAULT_SPINNER):
+            response = chain.run(diff_context)
+
+        parsed_response = prompt.output_parser.parse(response)
+        data = {"review_status": parsed_response.review_status, "review_comments": parsed_response.review_comments}
+        if commit:
+            data["commit"] = commit
+        json_response = json.dumps(data, indent=4)
+        print(json_response)  # noqa: T201
+
+    else:
+        # Stream live
+        with Live(Markdown(""), auto_refresh=True) as live:
+            llm.streaming = True
+            llm.callbacks = [RichLiveCallbackHandler(live, bot_style)]
 
-        chain.run(diff_context)
+            chain.run(diff_context)
 
 
 @cli.command
diff --git a/aicodebot/prompts.py b/aicodebot/prompts.py
@@ -2,7 +2,9 @@
 from aicodebot.config import read_config
 from aicodebot.helpers import logger
 from langchain import PromptTemplate
+from langchain.output_parsers import PydanticOutputParser
 from pathlib import Path
+from pydantic import BaseModel, Field
 from types import SimpleNamespace
 import functools, os
 
@@ -266,47 +268,73 @@ def generate_files_context(files):
     + get_personality_prompt()
     + """
 
-    DO NOT give comments that discuss formatting, as those will be handled with pre-commit hooks.
-    DO NOT respond with line numbers, use function names or file names instead.
-
     Here's the diff context:
 
     BEGIN DIFF
     {diff_context}
     END DIFF
 
     Remember:
-    - Lines starting with "-" are being removed.
-    - Lines starting with "+" are being added.
-    - Lines starting with " " are unchanged.
+    * Lines starting with "-" are being removed.
+    * Lines starting with "+" are being added.
+    * Lines starting with " " are unchanged.
+    * Consider the file names for context (e.g., "README.md" is a markdown file, "*.py" is a Python file).
+    * Understand the difference between code and comments. Comment lines start with ##, #, or //.
+    * Point out obvious spelling mistakes in plain text files if you see them, but don't check for spelling in code.
+    * Do not talk about minor changes. It's better to be terse and focus on issues.
+    * Do not talk about formatting, as that will be handled with pre-commit hooks.
+
+    The main focus is to tell the developer how to make the code better.
+
+    The review_status can be one of the following:
+    * "PASSED" (looks good to me) - there were no serious issues found,
+    * "COMMENTS" - there were some issues found, but they should not block the build and are informational only
+    * "FAILED" - there were serious, blocking issues found that should be fixed before merging the code
+
+    The review_message should be a markdown-formatted string for display with rich.Markdown or GitHub markdown.
+"""
+)
 
-    Consider the file names for context (e.g., "README.md" is a markdown file, "*.py" is a Python file).
-    Understand the difference between code and comments. Comment lines start with ##, #, or //.
 
-    The main focus is to tell me how I could make the code better.
+def get_prompt(command, structured_output=False):
+    """Generates a prompt for the sidekick workflow."""
 
-    Point out spelling mistakes in plain text files if you see them, but don't try to spell
-    function and variable names correctly.
+    if command == "review":
+        if structured_output:
+            parser = PydanticOutputParser(pydantic_object=ReviewResult)
+            return PromptTemplate(
+                template=REVIEW_TEMPLATE + "\n{format_instructions}",
+                input_variables=["diff_context"],
+                partial_variables={"format_instructions": parser.get_format_instructions()},
+                output_parser=parser,
+            )
+        else:
+            return PromptTemplate(
+                template=REVIEW_TEMPLATE + "\nRespond in markdown format", input_variables=["diff_context"]
+            )
 
-    If the changes look good overall and don't require any feedback, then just respond with "LGTM" (looks good to me).
+    else:
+        prompt_map = {
+            "alignment": PromptTemplate(template=ALIGNMENT_TEMPLATE, input_variables=[]),
+            "commit": PromptTemplate(template=COMMIT_TEMPLATE, input_variables=["diff_context"]),
+            "debug": PromptTemplate(template=DEBUG_TEMPLATE, input_variables=["command_output"]),
+            "fun_fact": PromptTemplate(template=FUN_FACT_TEMPLATE, input_variables=["topic"]),
+            "sidekick": PromptTemplate(template=SIDEKICK_TEMPLATE, input_variables=["chat_history", "task", "context"]),
+        }
 
-    Respond in markdown format.
-"""
-)
+        try:
+            return prompt_map[command]
+        except KeyError as e:
+            raise ValueError(f"Unable to find prompt for command {command}") from e
 
 
-def get_prompt(command):
-    """Generates a prompt for the sidekick workflow."""
-    prompt_map = {
-        "alignment": PromptTemplate(template=ALIGNMENT_TEMPLATE, input_variables=[]),
-        "commit": PromptTemplate(template=COMMIT_TEMPLATE, input_variables=["diff_context"]),
-        "debug": PromptTemplate(template=DEBUG_TEMPLATE, input_variables=["command_output"]),
-        "fun_fact": PromptTemplate(template=FUN_FACT_TEMPLATE, input_variables=["topic"]),
-        "review": PromptTemplate(template=REVIEW_TEMPLATE, input_variables=["diff_context"]),
-        "sidekick": PromptTemplate(template=SIDEKICK_TEMPLATE, input_variables=["chat_history", "task", "context"]),
-    }
-
-    try:
-        return prompt_map[command]
-    except KeyError as e:
-        raise ValueError(f"Unable to find prompt for command {command}") from e
+# ---------------------------------------------------------------------------- #
+#                                Output Parsers                                #
+# ---------------------------------------------------------------------------- #
+
+
+class ReviewResult(BaseModel):
+    """Review result from the sidekick."""
+
+    review_status: str = Field(description="The status of the review: PASSED, COMMENTS, or FAILED")
+    review_comments: str = Field(description="The comments from the review")
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -5,7 +5,7 @@
 from aicodebot.prompts import DEFAULT_PERSONALITY
 from git import Repo
 from pathlib import Path
-import os, pytest
+import json, os, pytest
 
 
 @pytest.mark.skipif(not os.getenv("OPENAI_API_KEY"), reason="Skipping live tests without an API key.")
@@ -109,12 +109,20 @@ def test_review(cli_runner, temp_git_repo):
         repo.git.add("test.txt")
 
         # Run the review command
-        result = cli_runner.invoke(cli, ["review"])
+        result = cli_runner.invoke(cli, ["review", "-t", "100"])
 
         # Check that the review command ran successfully
         assert result.exit_code == 0
         assert len(result.output) > 20
 
+        # Again with json output
+        result = cli_runner.invoke(cli, ["review", "-t", "100", "--output-format", "json"])
+
+        assert result.exit_code == 0
+        # Check if it's valid json
+        parsed = json.loads(result.output)
+        assert parsed["review_status"] == "PASSED"
+
 
 @pytest.mark.skipif(not os.getenv("OPENAI_API_KEY"), reason="Skipping live tests without an API key.")
 def test_sidekick(cli_runner):