homelabby/internal/ai/client.go

package ai

import (
	"context"
	"encoding/json"
	"fmt"
	"time"

	openai "github.com/sashabaranov/go-openai"

	"git.georgsen.dk/hwlab/internal/ai/prompts"
)

// AIClient is the single abstraction over any OpenAI-compatible inference backend.
// Production: TierClient wrapping sashabaranov/go-openai.
// Tests: MockAIClient with canned responses.
type AIClient interface {
	AnalyzePhotos(ctx context.Context, req IntakeRequest) (*IntakeResult, error)
}

// TierClient is the production AIClient backed by go-openai.
type TierClient struct {
	client  *openai.Client
	model   string
	timeout time.Duration
}

// NewTierClient creates a TierClient from a TierConfig.
// BaseURL is set directly on the openai.ClientConfig — this is the tier-routing mechanism.
func NewTierClient(cfg TierConfig) *TierClient {
	oCfg := openai.DefaultConfig(cfg.APIKey)
	oCfg.BaseURL = cfg.BaseURL
	timeout := time.Duration(cfg.TimeoutSeconds) * time.Second
	if timeout == 0 {
		timeout = 30 * time.Second
	}
	return &TierClient{
		client:  openai.NewClientWithConfig(oCfg),
		model:   cfg.Model,
		timeout: timeout,
	}
}

// AnalyzePhotos sends 1-3 base64-encoded photos to the configured model and
// parses the structured JSON response into an IntakeResult.
// Falls back gracefully: if the model returns malformed JSON, returns a
// zero-confidence IntakeResult (not an error) so the orchestrator can escalate.
func (c *TierClient) AnalyzePhotos(ctx context.Context, req IntakeRequest) (*IntakeResult, error) {
	// Build vision message parts: text prompt first, then image URLs
	parts := []openai.ChatMessagePart{
		{
			Type: openai.ChatMessagePartTypeText,
			Text: buildIntakePromptWithCount(len(req.PhotosBase64)),
		},
	}
	for _, b64 := range req.PhotosBase64 {
		parts = append(parts, openai.ChatMessagePart{
			Type: openai.ChatMessagePartTypeImageURL,
			ImageURL: &openai.ChatMessageImageURL{
				URL:    b64,
				Detail: openai.ImageURLDetailAuto,
			},
		})
	}

	tctx, cancel := context.WithTimeout(ctx, c.timeout)
	defer cancel()

	resp, err := c.client.CreateChatCompletion(tctx, openai.ChatCompletionRequest{
		Model: c.model,
		Messages: []openai.ChatCompletionMessage{
			{Role: openai.ChatMessageRoleUser, MultiContent: parts},
		},
	})
	if err != nil {
		return nil, fmt.Errorf("chat completion: %w", err)
	}
	if len(resp.Choices) == 0 {
		return nil, fmt.Errorf("no choices in response")
	}

	content := resp.Choices[0].Message.Content
	var result IntakeResult
	if err := json.Unmarshal([]byte(content), &result); err != nil {
		// JSON parse failure — return zero-confidence result so orchestrator escalates
		return &IntakeResult{
			AINotes:        fmt.Sprintf("JSON parse failed: %v | raw: %.200s", err, content),
			Confidence:     0.0,
			ConfidenceNote: "model returned non-JSON response",
		}, nil
	}
	return &result, nil
}

// TextComplete sends a text-only (non-vision) chat completion to the configured model.
// Used by the research agent for hardware enrichment prompts that require no images.
// Returns the raw string content of the first response choice.
func (c *TierClient) TextComplete(ctx context.Context, prompt string) (string, error) {
	tctx, cancel := context.WithTimeout(ctx, c.timeout)
	defer cancel()

	resp, err := c.client.CreateChatCompletion(tctx, openai.ChatCompletionRequest{
		Model: c.model,
		Messages: []openai.ChatCompletionMessage{
			{Role: openai.ChatMessageRoleUser, Content: prompt},
		},
	})
	if err != nil {
		return "", fmt.Errorf("text complete: %w", err)
	}
	if len(resp.Choices) == 0 {
		return "", fmt.Errorf("text complete: no choices in response")
	}
	return resp.Choices[0].Message.Content, nil
}

// buildIntakePromptWithCount is a package-internal shim to the prompts package.
func buildIntakePromptWithCount(n int) string {
	return prompts.BuildIntakePrompt(n)
}