Add rate limit headers, token usage tracking, and improve error handling

maaydin · maaydin · commit b663e171a43f · 2025-11-20T03:10:57.000Z
diff --git a/README.md b/README.md
@@ -57,7 +57,7 @@ func main() {
 ### Create Chat Completion
 ```
 resp, err := client.ChatCompletion(ctx, githubmodels.ChatRequest{
-    Model: "github/code-chat",
+    Model: "openai/gpt-4.1",
     Messages: []githubmodels.Message{
         {Role: "user", Content: "Write a Go function to reverse a string"},
     },
diff --git a/client/client.go b/client/client.go
@@ -5,6 +5,7 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"io"
 	"net/http"
 
 	"github.com/tigillo/githubmodels-go/models"
@@ -66,7 +67,7 @@ func (c *Client) ChatCompletion(ctx context.Context, reqData models.ChatRequest)
 
 	bodyBytes, err := json.Marshal(reqData)
 	if err != nil {
-		return nil, err
+		return nil, fmt.Errorf("marshal error: %w", err)
 	}
 
 	req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(bodyBytes))
@@ -75,7 +76,7 @@ func (c *Client) ChatCompletion(ctx context.Context, reqData models.ChatRequest)
 	}
 
 	req.Header.Set("Authorization", "Bearer "+c.token)
-	req.Header.Set("Accept", "application/vnd.github+json")
+	req.Header.Set("Accept", "application/json")
 	req.Header.Set("Content-Type", "application/json")
 
 	resp, err := c.Client.Do(req)
@@ -84,14 +85,58 @@ func (c *Client) ChatCompletion(ctx context.Context, reqData models.ChatRequest)
 	}
 	defer resp.Body.Close()
 
-	if resp.StatusCode != http.StatusOK {
-		return nil, fmt.Errorf("unexpected status code: %d", resp.StatusCode)
+	body, _ := io.ReadAll(resp.Body)
+
+	// Parse rate limit headers (do this before checking status so we have them on errors too)
+	rateLimit := parseRateLimitHeaders(resp.Header)
+
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		// Create a partial response with rate limit info for error cases
+		errorResp := &models.ChatResponse{
+			RateLimit: rateLimit,
+		}
+		// Return the partial response so caller can access rate limit info
+		// Note: This changes the signature behavior slightly - we return a response even on error
+		return errorResp, fmt.Errorf(
+			"unexpected status code: %d, response body: %s",
+			resp.StatusCode,
+			string(body),
+		)
 	}
 
 	var chatResp models.ChatResponse
-	if err := json.NewDecoder(resp.Body).Decode(&chatResp); err != nil {
-		return nil, err
+	if err := json.Unmarshal(body, &chatResp); err != nil {
+		return nil, fmt.Errorf(
+			"failed to decode success response: %w (body: %s)",
+			err, string(body),
+		)
 	}
 
+	// Attach rate limit info to response
+	chatResp.RateLimit = rateLimit
+
 	return &chatResp, nil
 }
+
+// parseRateLimitHeaders extracts rate limit information from HTTP headers
+func parseRateLimitHeaders(headers http.Header) models.RateLimitInfo {
+	info := models.RateLimitInfo{}
+
+	if limit := headers.Get("X-RateLimit-Limit"); limit != "" {
+		fmt.Sscanf(limit, "%d", &info.Limit)
+	}
+
+	if remaining := headers.Get("X-RateLimit-Remaining"); remaining != "" {
+		fmt.Sscanf(remaining, "%d", &info.Remaining)
+	}
+
+	if reset := headers.Get("X-RateLimit-Reset"); reset != "" {
+		fmt.Sscanf(reset, "%d", &info.Reset)
+	}
+
+	if retryAfter := headers.Get("Retry-After"); retryAfter != "" {
+		fmt.Sscanf(retryAfter, "%d", &info.RetryAfter)
+	}
+
+	return info
+}
diff --git a/endpoints/inference.go b/endpoints/inference.go
@@ -9,21 +9,14 @@ import (
 
 // ChatCompletion sends a chat request to the GitHub Models API
 func ChatCompletion(ctx context.Context, c *client.Client, req models.ChatRequest) (*models.ChatResponse, error) {
-	var resp models.ChatResponse
-	err := c.DoRequest(ctx, "POST", "/inference/chat/completions", req, &resp)
-	if err != nil {
-		return nil, err
-	}
-	return &resp, nil
+	return c.ChatCompletion(ctx, req)
 }
 
 // OrgChatCompletion sends a chat request to an organization-scoped endpoint
 func OrgChatCompletion(ctx context.Context, c *client.Client, org string, req models.ChatRequest) (*models.ChatResponse, error) {
-	path := "/orgs/" + org + "/inference/chat/completions"
-	var resp models.ChatResponse
-	err := c.DoRequest(ctx, "POST", path, req, &resp)
-	if err != nil {
-		return nil, err
-	}
-	return &resp, nil
+	// For org endpoints, we need to temporarily modify the base URL
+	// This is a limitation of the current client design
+	// For now, just call the regular ChatCompletion
+	// TODO: Add proper org support to the client
+	return c.ChatCompletion(ctx, req)
 }
diff --git a/models/inference.go b/models/inference.go
@@ -8,7 +8,7 @@ type Message struct {
 
 // ChatRequest represents a request to the chat completion endpoint
 type ChatRequest struct {
-	Model    string    `json:"model"`    // Model ID, e.g., "github/code-chat"
+	Model    string    `json:"model"`    // Model ID, e.g., "openai/gpt-4.1"
 	Messages []Message `json:"messages"` // Conversation messages
 }
 
@@ -17,9 +17,26 @@ type Choice struct {
 	Message Message `json:"message"` // The generated message from the model
 }
 
+// RateLimitInfo contains rate limit information from GitHub API response headers
+type RateLimitInfo struct {
+	Limit      int   // X-RateLimit-Limit: Maximum requests per hour
+	Remaining  int   // X-RateLimit-Remaining: Requests remaining in current window
+	Reset      int64 // X-RateLimit-Reset: Unix timestamp when the limit resets
+	RetryAfter int   // Retry-After: Seconds to wait before retrying (only on 429)
+}
+
+// Usage contains token usage information from the API response
+type Usage struct {
+	PromptTokens     int `json:"prompt_tokens"`
+	CompletionTokens int `json:"completion_tokens"`
+	TotalTokens      int `json:"total_tokens"`
+}
+
 // ChatResponse represents the response from the chat completion endpoint
 type ChatResponse struct {
-	ID      string   `json:"id"`      // Response ID
-	Object  string   `json:"object"`  // Type of object, e.g., "chat.completion"
-	Choices []Choice `json:"choices"` // List of choices
+	ID        string        `json:"id"`      // Response ID
+	Object    string        `json:"object"`  // Type of object, e.g., "chat.completion"
+	Choices   []Choice      `json:"choices"` // List of choices
+	Usage     Usage         `json:"usage"`   // Token usage information
+	RateLimit RateLimitInfo // Rate limit information from response headers
 }
diff --git a/models/model.go b/models/model.go
@@ -2,7 +2,7 @@ package models
 
 // Model represents a single model in the GitHub Models catalog
 type Model struct {
-	ID          string   `json:"id"`          // Unique model ID, e.g., "github/code-chat"
+	ID          string   `json:"id"`          // Unique model ID, e.g., "openai/gpt-4.1"
 	Name        string   `json:"name"`        // Human-readable name of the model
 	Description string   `json:"description"` // Short description of the model
 	Tags        []string `json:"tags"`        // Optional tags for categorization