homelabby/internal/research/agent.go
Mikkel Georgsen 0072aa41bd feat(07-01): ResearchAgent worker, trigger endpoint, main.go wiring
- internal/research/agent.go: Agent with RunOnce+Start, sanitizeQuery, interface adapters
- internal/research/agent_test.go: stub-based unit tests (sanitize, enrich, skip, empty)
- internal/ai/client.go: TierClient.TextComplete for text-only LLM calls
- internal/api/handlers/research.go: POST /api/research/trigger handler (202 Accepted)
- internal/api/router.go: researchHandler param + /api/research/trigger route
- cmd/hwlab/main.go: research agent goroutine started with 10min interval
2026-04-10 07:51:13 +00:00

185 lines
6 KiB
Go

package research
import (
"context"
"encoding/json"
"fmt"
"log"
"regexp"
"strings"
"time"
"git.georgsen.dk/hwlab/internal/ai"
"git.georgsen.dk/hwlab/internal/inventory"
"git.georgsen.dk/hwlab/internal/netbox"
)
// nonSafeChars matches characters that are not safe to send to SearXNG.
// Allowed: alphanumeric, space, dot, dash, underscore.
var nonSafeChars = regexp.MustCompile(`[^a-zA-Z0-9 .\-_]+`)
// SanitizeQuery strips unsafe characters from a search query string.
// Exported so it can be tested from the _test package.
func SanitizeQuery(s string) string {
sanitized := nonSafeChars.ReplaceAllString(s, " ")
return strings.TrimSpace(sanitized)
}
// NetBoxer is the subset of netbox.Client used by the Agent.
// Using an interface allows stub injection in tests.
type NetBoxer interface {
ListDevicesWithStatus(ctx context.Context, status string) ([]netbox.Device, error)
PatchCustomFields(ctx context.Context, deviceID int64, patch map[string]interface{}) error
}
// TextCompleter is the subset of ai.TierClient used by the Agent for text-only LLM calls.
type TextCompleter interface {
TextComplete(ctx context.Context, prompt string) (string, error)
}
// CatalogTransitioner is the subset of inventory.CatalogUpdater used by the Agent.
type CatalogTransitioner interface {
UpdateCatalogStatus(ctx context.Context, deviceID int64, current, next inventory.CatalogStatus) (inventory.CatalogStatus, error)
}
// Agent is the background worker that enriches needs_research hardware items.
// It polls NetBox, searches SearXNG, calls a Tier 2 LLM, and transitions items to researched.
type Agent struct {
nbClient NetBoxer
researchClient ai.ResearchClient
llm TextCompleter
updater CatalogTransitioner
}
// NewAgent creates an Agent. All arguments must be non-nil.
func NewAgent(nb NetBoxer, rc ai.ResearchClient, llm TextCompleter, updater CatalogTransitioner) *Agent {
return &Agent{
nbClient: nb,
researchClient: rc,
llm: llm,
updater: updater,
}
}
// enrichmentResponse is the expected JSON structure from the Tier 2 LLM.
type enrichmentResponse struct {
AINotes string `json:"ai_notes"`
ProductURL string `json:"product_url"`
}
// RunOnce performs a single research cycle: finds all needs_research devices,
// enriches each via SearXNG + LLM, patches NetBox custom fields, and transitions
// the catalog status to researched. Returns the number of items enriched.
func (a *Agent) RunOnce(ctx context.Context) (int, error) {
devices, err := a.nbClient.ListDevicesWithStatus(ctx, string(inventory.StatusNeedsResearch))
if err != nil {
return 0, fmt.Errorf("research agent: list needs_research devices: %w", err)
}
enriched := 0
for _, dev := range devices {
query := SanitizeQuery(dev.Name)
if query == "" {
log.Printf("research agent: device %d has empty name after sanitization, skipping", dev.ID)
continue
}
results, err := a.researchClient.Search(ctx, query)
if err != nil {
log.Printf("research agent: search error for device %d (%q): %v", dev.ID, dev.Name, err)
continue
}
if len(results) == 0 {
log.Printf("research agent: no SearXNG results for device %d (%q), skipping", dev.ID, dev.Name)
continue
}
// Build enrichment prompt using top 3 results
top := results
if len(top) > 3 {
top = top[:3]
}
var sb strings.Builder
for i, r := range top {
sb.WriteString(fmt.Sprintf("%d. %s\n %s\n URL: %s\n", i+1, r.Title, r.Snippet, r.URL))
}
prompt := fmt.Sprintf(
"You are enriching a hardware inventory record.\nItem: %s\nSearch results:\n%s\nReturn JSON: {\"ai_notes\": \"...\", \"product_url\": \"...\"}",
dev.Name, sb.String(),
)
rawResponse, err := a.llm.TextComplete(ctx, prompt)
if err != nil {
log.Printf("research agent: LLM error for device %d (%q): %v", dev.ID, dev.Name, err)
continue
}
// Parse LLM JSON response — extract ai_notes and product_url
var enrichResp enrichmentResponse
if parseErr := json.Unmarshal([]byte(rawResponse), &enrichResp); parseErr != nil {
log.Printf("research agent: LLM non-JSON for device %d: %v (raw: %.100s)", dev.ID, parseErr, rawResponse)
// Use raw response as ai_notes fallback
enrichResp.AINotes = rawResponse
}
// Patch NetBox custom fields with enrichment data
patch := map[string]interface{}{}
if enrichResp.AINotes != "" {
patch["ai_notes"] = enrichResp.AINotes
}
if enrichResp.ProductURL == "" && len(results) > 0 {
// Fall back to first SearXNG result URL if LLM didn't provide one
enrichResp.ProductURL = results[0].URL
}
if enrichResp.ProductURL != "" {
patch["product_url"] = enrichResp.ProductURL
}
if len(patch) > 0 {
if patchErr := a.nbClient.PatchCustomFields(ctx, int64(dev.ID), patch); patchErr != nil {
log.Printf("research agent: patch error for device %d: %v", dev.ID, patchErr)
continue
}
}
// Transition catalog status: needs_research -> researched
if _, transErr := a.updater.UpdateCatalogStatus(ctx, int64(dev.ID),
inventory.StatusNeedsResearch, inventory.StatusResearched); transErr != nil {
log.Printf("research agent: status transition error for device %d: %v", dev.ID, transErr)
continue
}
enriched++
}
return enriched, nil
}
// Start runs the research agent on the given interval until ctx is cancelled.
// RunOnce is called immediately on start, then on each tick.
func (a *Agent) Start(ctx context.Context, interval time.Duration) {
log.Printf("research agent: starting, interval=%v", interval)
// Run immediately on startup
if n, err := a.RunOnce(ctx); err != nil {
log.Printf("research agent: initial cycle error: %v", err)
} else {
log.Printf("research agent: cycle complete, enriched %d items", n)
}
ticker := time.NewTicker(interval)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
log.Printf("research agent: shutting down")
return
case <-ticker.C:
if n, err := a.RunOnce(ctx); err != nil {
log.Printf("research agent: cycle error: %v", err)
} else {
log.Printf("research agent: cycle complete, enriched %d items", n)
}
}
}
}