utensils
diff --git a/‎mcp_nixos/server.py‎
Lines changed: 996 additions & 163 deletions b/‎mcp_nixos/server.py‎
Lines changed: 996 additions & 163 deletions
diff --git a/‎tests/test_channels.py‎
Lines changed: 6 additions & 6 deletions b/‎tests/test_channels.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎tests/test_discussions.py‎
Lines changed: 3 additions & 3 deletions b/‎tests/test_discussions.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎tests/test_edge_cases.py‎
Lines changed: 17 additions & 17 deletions b/‎tests/test_edge_cases.py‎
Lines changed: 17 additions & 17 deletions
diff --git a/‎tests/test_evals.py‎
Lines changed: 35 additions & 30 deletions b/‎tests/test_evals.py‎
Lines changed: 35 additions & 30 deletions
@@ -159,10 +159,10 @@ async def test_channels_tool(self, mock_resolved, mock_discover):
 
         result = await channels()
 
-        assert "NixOS Channels" in result  # Match both old and new format
-        assert "unstable → latest-43-nixos-unstable" in result or "unstable \u2192 latest-43-nixos-unstable" in result
+        assert "CHANNELS: Available" in result  # Match both old and new format
+        assert "unstable -> latest-43-nixos-unstable" in result
         assert "stable" in result and "latest-43-nixos-25.05" in result
-        assert "✓ Available" in result
+        assert "[Available]" in result
         assert "151,798 documents" in result
 
     @patch("mcp_nixos.server.channel_cache.get_available")
@@ -180,8 +180,8 @@ async def test_channels_with_unavailable(self, mock_resolved, mock_discover):
 
         result = await channels()
 
-        assert "✓ Available" in result
-        assert "✗ Unavailable" in result
+        assert "[Available]" in result
+        assert "[Unavailable]" in result
 
     @patch("mcp_nixos.server.channel_cache.get_available")
     @pytest.mark.asyncio
@@ -469,7 +469,7 @@ async def test_stats_with_dynamic_channels(self, mock_resolve):
             # Should work with new stable
             result = await stats("stable")
             # Should not error and should contain statistics
-            assert "NixOS Statistics" in result
+            assert "STATS:" in result
             assert "stable" in result
             # Should have made API calls
             assert mock_post.called
 
@@ -368,8 +368,8 @@ async def test_help_includes_discussion_tools(self):
         help_fn = get_tool_function("help")
         result = await help_fn()
 
-        assert "COMMUNITY & DISCUSSIONS" in result
+        assert "COMMUNITY & HELP" in result
         assert "discourse-search" in result
         assert "github-search" in result
-        assert "Search NixOS forum" in result
-        assert "Search GitHub issues/PRs" in result
+        assert "Search forum" in result
+        assert "Search issues/PRs" in result
@@ -176,19 +176,19 @@ async def test_search_invalid_parameters(self):
         """Test search with various invalid parameters."""
         # Invalid type
         result = await search("test", search_type="invalid")
-        assert "Error (ERROR): Invalid type 'invalid'" in result
+        assert "Error (INVALID_TYPE): Invalid type 'invalid'" in result
 
         # Invalid channel
         result = await search("test", channel="nonexistent")
-        assert "Error (ERROR): Invalid channel 'nonexistent'" in result
+        assert "Error (ERROR): Invalid channel 'nonexistent'." in result  # Note the period
 
         # Invalid limit (too low)
         result = await search("test", limit=0)
-        assert "Error (ERROR): Limit must be 1-100" in result
+        assert "Error (INVALID_LIMIT): Limit must be 1-100" in result
 
         # Invalid limit (too high)
         result = await search("test", limit=101)
-        assert "Error (ERROR): Limit must be 1-100" in result
+        assert "Error (INVALID_LIMIT): Limit must be 1-100" in result
 
     @patch("mcp_nixos.server.es_query")
     @pytest.mark.asyncio
@@ -209,7 +209,7 @@ async def test_search_programs_edge_case(self, mock_es_query):
 
         # Search for 'ls' should find it in programs
         result = await search("ls", search_type="programs")
-        assert "ls (provided by coreutils)" in result
+        assert "• ls -> provided by coreutils" in result
 
         # Search for 'grep' should not show coreutils
         result = await search("grep", search_type="programs")
@@ -230,7 +230,7 @@ async def test_show_with_missing_fields(self, mock_es_query):
         ]
 
         result = await show("minimal-pkg", type="package")
-        assert "Package: minimal-pkg" in result
+        assert "Name: minimal-pkg" in result
         assert "Version: " in result  # Empty version
         # Should not crash on missing fields
 
@@ -280,10 +280,10 @@ async def test_hm_search_edge_cases(self):
         """Test hm_search with edge cases."""
         # Invalid limit
         result = await hm_search("test", limit=0)
-        assert "Error (ERROR): Limit must be 1-100" in result
+        assert "Error (INVALID_LIMIT): Limit must be 1-100" in result
 
         result = await hm_search("test", limit=101)
-        assert "Error (ERROR): Limit must be 1-100" in result
+        assert "Error (INVALID_LIMIT): Limit must be 1-100" in result
 
     @patch("mcp_nixos.server.parse_html_options")
     @pytest.mark.asyncio
@@ -345,29 +345,29 @@ async def test_all_tools_handle_exceptions_gracefully(self):
         """Test that all tools handle exceptions and return error messages."""
         with patch("requests.post", side_effect=Exception("Network error")):
             result = await search("test")
-            assert "Error (ERROR):" in result
+            assert "Error (" in result
 
             result = await show("test")
-            assert "Error (ERROR):" in result
+            assert "Error (" in result
 
             result = await stats()
-            assert "Error (ERROR):" in result
+            assert "Error (" in result
 
         with patch("requests.get", side_effect=Exception("Network error")):
             result = await hm_search("test")
-            assert "Error (ERROR):" in result
+            assert "Error (" in result
 
             result = await hm_show("test")
-            assert "Error (ERROR):" in result
+            assert "Error (" in result
 
             result = await hm_options()
-            assert "Error (ERROR):" in result
+            assert "Error (" in result
 
             result = await darwin_search("test")
-            assert "Error (ERROR):" in result
+            assert "Error (" in result
 
             result = await darwin_show("test")
-            assert "Error (ERROR):" in result
+            assert "Error (" in result
 
             result = await darwin_options()
-            assert "Error (ERROR):" in result
+            assert "Error (" in result
@@ -141,12 +141,13 @@ async def test_complete_firefox_installation_flow(self, mock_get, mock_post):
         # Execute the flow
         # 1. Search for Firefox
         result1 = await search("firefox")
-        assert "Found 1 packages matching 'firefox':" in result1
+        assert "SEARCH: packages" in result1
+        assert "Results: 1 packages found" in result1
         assert "• firefox (121.0)" in result1
 
         # 2. Get detailed info
         result2 = await show("firefox")
-        assert "Package: firefox" in result2
+        assert "Name: firefox" in result2
         assert "Homepage: https://www.mozilla.org/firefox/" in result2
 
         # 3. Check Home Manager options
@@ -217,14 +218,14 @@ async def _make_tool_call(self, tool_name: str, **kwargs) -> str:
             "search": search,
             "show": show,
             "stats": stats,
-            "hm_search": hm_search,
-            "hm_show": hm_show,
-            "hm_options": hm_options,
-            "hm_browse": hm_browse,
-            "darwin_search": darwin_search,
-            "darwin_show": darwin_show,
-            "darwin_options": darwin_options,
-            "darwin_browse": darwin_browse,
+            "hm-search": hm_search,
+            "hm-show": hm_show,
+            "hm-options": hm_options,
+            "hm-browse": hm_browse,
+            "darwin-search": darwin_search,
+            "darwin-show": darwin_show,
+            "darwin-options": darwin_options,
+            "darwin-browse": darwin_browse,
         }
 
         if tool_name in tools:
@@ -269,20 +270,20 @@ async def _handle_home_manager_query(self, query: str):
         if "git" in query.lower():
             # Search both system and user options
             await self._make_tool_call("search", query="git", search_type="packages")
-            await self._make_tool_call("hm_search", query="programs.git")
-            await self._make_tool_call("hm_show", name="programs.git.enable")
+            await self._make_tool_call("hm-search", query="programs.git")
+            await self._make_tool_call("hm-show", name="programs.git.enable")
         elif "shell" in query.lower():
             # Handle shell configuration queries
-            await self._make_tool_call("hm_search", query="programs.zsh")
-            await self._make_tool_call("hm_show", name="programs.zsh.enable")
-            await self._make_tool_call("hm_browse", option_prefix="programs.zsh")
+            await self._make_tool_call("hm-search", query="programs.zsh")
+            await self._make_tool_call("hm-show", name="programs.zsh.enable")
+            await self._make_tool_call("hm-browse", option_prefix="programs.zsh")
 
     async def _handle_darwin_query(self, query: str):
         """Handle Darwin/macOS queries."""
         if "dock" in query.lower():
-            await self._make_tool_call("darwin_search", query="system.defaults.dock")
-            await self._make_tool_call("darwin_show", name="system.defaults.dock.autohide")
-            await self._make_tool_call("darwin_browse", option_prefix="system.defaults.dock")
+            await self._make_tool_call("darwin-search", query="system.defaults.dock")
+            await self._make_tool_call("darwin-show", name="system.defaults.dock.autohide")
+            await self._make_tool_call("darwin-browse", option_prefix="system.defaults.dock")
 
     async def _handle_comparison_query(self, query: str):
         """Handle package comparison queries."""
@@ -344,8 +345,12 @@ def _check_criteria(self, scenario: EvalScenario, tool_calls: list[tuple[str, di
 
         for criterion in scenario.success_criteria:
             if "finds" in criterion and "package" in criterion:
-                # Check if package was found
-                criteria_met[criterion] = any("Found" in call[2] and "packages" in call[2] for call in tool_calls)
+                # Check if package was found (new format)
+                criteria_met[criterion] = any(
+                    ("Results:" in call[2] and "packages found" in call[2])
+                    or ("Found" in call[2] and "packages" in call[2])  # Support old format too
+                    for call in tool_calls
+                )
             elif "mentions" in criterion:
                 # Check if certain text is mentioned
                 key_term = criterion.split("mentions")[1].strip()
@@ -626,8 +631,8 @@ async def test_eval_user_vs_system_config(self, mock_parse, mock_query):
             user_query="Should I configure git in NixOS or Home Manager?",
             expected_tool_calls=[
                 "await search(query='git', search_type='packages')",
-                "await hm_search(query='programs.git')",
-                "await hm_show(name='programs.git.enable')",
+                "await hm-search(query='programs.git')",
+                "await hm-show(name='programs.git.enable')",
             ],
             success_criteria=[
                 "explains system vs user configuration",
@@ -640,7 +645,7 @@ async def test_eval_user_vs_system_config(self, mock_parse, mock_query):
         result = await self.framework.run_eval(scenario)
 
         assert len(result.tool_calls_made) >= 3
-        assert any("hm_" in call[0] for call in result.tool_calls_made)
+        assert any("hm-" in call[0] for call in result.tool_calls_made)
 
     @patch("mcp_nixos.server.parse_html_options")
     @pytest.mark.asyncio
@@ -655,9 +660,9 @@ async def test_eval_dotfiles_management(self, mock_parse):
             name="shell_config",
             user_query="How do I manage my shell configuration with Home Manager?",
             expected_tool_calls=[
-                "await hm_search(query='programs.zsh')",
-                "await hm_show(name='programs.zsh.enable')",
-                "await hm_browse(option_prefix='programs.zsh')",
+                "await hm-search(query='programs.zsh')",
+                "await hm-show(name='programs.zsh.enable')",
+                "await hm-browse(option_prefix='programs.zsh')",
             ],
             success_criteria=[
                 "enables shell program",
@@ -691,9 +696,9 @@ async def test_eval_macos_dock_settings(self, mock_parse):
             name="macos_dock_config",
             user_query="How do I configure dock settings with nix-darwin?",
             expected_tool_calls=[
-                "await darwin_search(query='system.defaults.dock')",
-                "await darwin_show(name='system.defaults.dock.autohide')",
-                "await darwin_browse(option_prefix='system.defaults.dock')",
+                "await darwin-search(query='system.defaults.dock')",
+                "await darwin-show(name='system.defaults.dock.autohide')",
+                "await darwin-browse(option_prefix='system.defaults.dock')",
             ],
             success_criteria=[
                 "finds dock configuration options",
@@ -804,7 +809,7 @@ async def test_run_all_evals(self):
             EvalScenario(
                 name="home_manager_usage",
                 user_query="Should I use Home Manager for git config?",
-                expected_tool_calls=["await hm_search(query='git')"],
+                expected_tool_calls=["await hm-search(query='git')"],
                 success_criteria=["recommends Home Manager"],
             ),
         ]