package research import ( "context" "encoding/json" "fmt" "log" "regexp" "strings" "time" "git.georgsen.dk/hwlab/internal/ai" "git.georgsen.dk/hwlab/internal/inventory" "git.georgsen.dk/hwlab/internal/netbox" ) // nonSafeChars matches characters that are not safe to send to SearXNG. // Allowed: alphanumeric, space, dot, dash, underscore. var nonSafeChars = regexp.MustCompile(`[^a-zA-Z0-9 .\-_]+`) // SanitizeQuery strips unsafe characters from a search query string. // Exported so it can be tested from the _test package. func SanitizeQuery(s string) string { sanitized := nonSafeChars.ReplaceAllString(s, " ") return strings.TrimSpace(sanitized) } // NetBoxer is the subset of netbox.Client used by the Agent. // Using an interface allows stub injection in tests. type NetBoxer interface { ListDevicesWithStatus(ctx context.Context, status string) ([]netbox.Device, error) PatchCustomFields(ctx context.Context, deviceID int64, patch map[string]interface{}) error } // TextCompleter is the subset of ai.TierClient used by the Agent for text-only LLM calls. type TextCompleter interface { TextComplete(ctx context.Context, prompt string) (string, error) } // CatalogTransitioner is the subset of inventory.CatalogUpdater used by the Agent. type CatalogTransitioner interface { UpdateCatalogStatus(ctx context.Context, deviceID int64, current, next inventory.CatalogStatus) (inventory.CatalogStatus, error) } // Agent is the background worker that enriches needs_research hardware items. // It polls NetBox, searches SearXNG, calls a Tier 2 LLM, and transitions items to researched. type Agent struct { nbClient NetBoxer researchClient ai.ResearchClient llm TextCompleter updater CatalogTransitioner } // NewAgent creates an Agent. All arguments must be non-nil. func NewAgent(nb NetBoxer, rc ai.ResearchClient, llm TextCompleter, updater CatalogTransitioner) *Agent { return &Agent{ nbClient: nb, researchClient: rc, llm: llm, updater: updater, } } // enrichmentResponse is the expected JSON structure from the Tier 2 LLM. type enrichmentResponse struct { AINotes string `json:"ai_notes"` ProductURL string `json:"product_url"` } // RunOnce performs a single research cycle: finds all needs_research devices, // enriches each via SearXNG + LLM, patches NetBox custom fields, and transitions // the catalog status to researched. Returns the number of items enriched. func (a *Agent) RunOnce(ctx context.Context) (int, error) { devices, err := a.nbClient.ListDevicesWithStatus(ctx, string(inventory.StatusNeedsResearch)) if err != nil { return 0, fmt.Errorf("research agent: list needs_research devices: %w", err) } enriched := 0 for _, dev := range devices { query := SanitizeQuery(dev.Name) if query == "" { log.Printf("research agent: device %d has empty name after sanitization, skipping", dev.ID) continue } results, err := a.researchClient.Search(ctx, query) if err != nil { log.Printf("research agent: search error for device %d (%q): %v", dev.ID, dev.Name, err) continue } if len(results) == 0 { log.Printf("research agent: no SearXNG results for device %d (%q), skipping", dev.ID, dev.Name) continue } // Build enrichment prompt using top 3 results top := results if len(top) > 3 { top = top[:3] } var sb strings.Builder for i, r := range top { sb.WriteString(fmt.Sprintf("%d. %s\n %s\n URL: %s\n", i+1, r.Title, r.Snippet, r.URL)) } prompt := fmt.Sprintf( "You are enriching a hardware inventory record.\nItem: %s\nSearch results:\n%s\nReturn JSON: {\"ai_notes\": \"...\", \"product_url\": \"...\"}", dev.Name, sb.String(), ) rawResponse, err := a.llm.TextComplete(ctx, prompt) if err != nil { log.Printf("research agent: LLM error for device %d (%q): %v", dev.ID, dev.Name, err) continue } // Parse LLM JSON response — extract ai_notes and product_url var enrichResp enrichmentResponse if parseErr := json.Unmarshal([]byte(rawResponse), &enrichResp); parseErr != nil { log.Printf("research agent: LLM non-JSON for device %d: %v (raw: %.100s)", dev.ID, parseErr, rawResponse) // Use raw response as ai_notes fallback enrichResp.AINotes = rawResponse } // Patch NetBox custom fields with enrichment data patch := map[string]interface{}{} if enrichResp.AINotes != "" { patch["ai_notes"] = enrichResp.AINotes } if enrichResp.ProductURL == "" && len(results) > 0 { // Fall back to first SearXNG result URL if LLM didn't provide one enrichResp.ProductURL = results[0].URL } if enrichResp.ProductURL != "" { patch["product_url"] = enrichResp.ProductURL } if len(patch) > 0 { if patchErr := a.nbClient.PatchCustomFields(ctx, int64(dev.ID), patch); patchErr != nil { log.Printf("research agent: patch error for device %d: %v", dev.ID, patchErr) continue } } // Transition catalog status: needs_research -> researched if _, transErr := a.updater.UpdateCatalogStatus(ctx, int64(dev.ID), inventory.StatusNeedsResearch, inventory.StatusResearched); transErr != nil { log.Printf("research agent: status transition error for device %d: %v", dev.ID, transErr) continue } enriched++ } return enriched, nil } // Start runs the research agent on the given interval until ctx is cancelled. // RunOnce is called immediately on start, then on each tick. func (a *Agent) Start(ctx context.Context, interval time.Duration) { log.Printf("research agent: starting, interval=%v", interval) // Run immediately on startup if n, err := a.RunOnce(ctx); err != nil { log.Printf("research agent: initial cycle error: %v", err) } else { log.Printf("research agent: cycle complete, enriched %d items", n) } ticker := time.NewTicker(interval) defer ticker.Stop() for { select { case <-ctx.Done(): log.Printf("research agent: shutting down") return case <-ticker.C: if n, err := a.RunOnce(ctx); err != nil { log.Printf("research agent: cycle error: %v", err) } else { log.Printf("research agent: cycle complete, enriched %d items", n) } } } }