diff --git a/cmd/sea/nginx.conf.tmpl b/cmd/sea/nginx.conf.tmpl index 1dff205..a32e4de 100644 --- a/cmd/sea/nginx.conf.tmpl +++ b/cmd/sea/nginx.conf.tmpl @@ -7,10 +7,25 @@ map $arg_q $dest { ~*(?i)\b(near\s+me|directions?\s+to|map\s+of)\b google_maps; # Wikipedia lookups ~*(?i)\b(?:biography|history|life\s+of)\b wikipedia; - # question detection - ~*(?i)\b(who|what|when|where|why|how|can|could|would|should|do|did|is|are|was|were|am|will|whom|whose|which)\b chatgpt; - # instructional queries - ~*(?i)\b(explain|describe|compare|define)\b chatgpt; + # ChatGPT queries + ~*(?i)^(add|allow|appear|ask|begin|believe|bring|build|buy|call)\b chatgpt; + ~*(?i)^(change|come|consider|create|cut|do|fall|feel|find)\b chatgpt; + ~*(?i)^(follow|forgive|generate|get|give|go|grow|have|hear|help)\b chatgpt; + ~*(?i)^(include|keep|kill|know|learn|leave|let|like|live|look)\b chatgpt; + ~*(?i)^(lose|love|make|mean|meet|move|offer|open|pay|play)\b chatgpt; + ~*(?i)^(provide|put|read|reach|remain|remember|send|serve|set)\b chatgpt; + ~*(?i)^(show|sit|speak|spend|stand|start|stay|stop|succeed|take)\b chatgpt; + ~*(?i)^(talk|tell|think|try|turn|understand|use|watch|work)\b chatgpt; + ~*(?i)^(write|what|how|why|when|where|who|which|whose|will)\b chatgpt; + ~*(?i)^(would|can|could|should|might|must|may|does|did|are|is)\b chatgpt; + ~*(?i)^(was|were|am|i|we|team|explain|list|compare|contrast)\b chatgpt; + ~*(?i)^(summarize|translate|define|describe|recommend|analyze)\b chatgpt; + ~*(?i)^(evaluate|outline|plan|design|develop|propose|edit)\b chatgpt; + ~*(?i)^(improve|fix|optimize|assist|research|brainstorm|suggest)\b chatgpt; + ~*(?i)^(solve|calculate|interpret|categorize|classify|format)\b chatgpt; + ~*(?i)^(style|proofread|review|listen|plot|graph|chart|map)\b chatgpt; + ~*(?i)^(diagram|simulate|predict|forecast|estimate|answer|reply)\b chatgpt; + ~*(?i)^(respond|inquire|query|request|discuss)\b chatgpt; # explicit wiki keywords ~*(?i)\bwikipedia\b|\bwiki\b wikipedia; {{- range .CustomKeywords }} diff --git a/cmd/sea/nginx_regex_test.go b/cmd/sea/nginx_regex_test.go new file mode 100644 index 0000000..0517f45 --- /dev/null +++ b/cmd/sea/nginx_regex_test.go @@ -0,0 +1,81 @@ +package main + +import ( + "regexp" + "strings" + "testing" +) + +// extractChatGPTRules compiles regexes from the generated nginx config. +func extractChatGPTRules(conf string) ([]*regexp.Regexp, []string, error) { + var patterns []string + lines := strings.Split(conf, "\n") + inBlock := false + re := regexp.MustCompile(`~\*\(\?i\)\^([^\\]+)\\b\s+chatgpt;`) + for _, l := range lines { + if strings.Contains(l, "# ChatGPT queries") { + inBlock = true + continue + } + if strings.Contains(l, "# explicit wiki keywords") { + break + } + if inBlock { + l = strings.TrimSpace(l) + if m := re.FindStringSubmatch(l); len(m) == 2 { + p := strings.TrimPrefix(strings.TrimSuffix(m[1], ")"), "(") + patterns = append(patterns, p) + } + } + } + if len(patterns) == 0 { + return nil, nil, nil + } + full := "(?i)^(" + strings.Join(patterns, "|") + ")\\b" + rx, err := regexp.Compile(full) + if err != nil { + return nil, nil, err + } + var words []string + for _, p := range patterns { + words = append(words, strings.Split(p, "|")...) + } + return []*regexp.Regexp{rx}, words, nil +} + +func matchAny(rs []*regexp.Regexp, s string) bool { + for _, r := range rs { + if r.MatchString(s) { + return true + } + } + return false +} + +func TestChatGPTRouteAnchoring(t *testing.T) { + cfg := Config{} + out, err := generateNginx(cfg) + if err != nil { + t.Fatalf("failed to generate nginx: %v", err) + } + regs, words, err := extractChatGPTRules(out) + if err != nil { + t.Fatalf("failed to extract regex: %v", err) + } + if len(regs) == 0 { + t.Fatal("no ChatGPT regex found in config") + } + for _, w := range words { + start := w + " test" + if !matchAny(regs, start) { + t.Errorf("word %q not matched at start", w) + } + notStart := "hello " + w + " there" + if matchAny(regs, notStart) { + t.Errorf("word %q matched when not at start", w) + } + } + if matchAny(regs, "completely unrelated") { + t.Error("unrelated query incorrectly matched") + } +} diff --git a/nginx.conf b/nginx.conf index c34d937..769a845 100644 --- a/nginx.conf +++ b/nginx.conf @@ -7,10 +7,25 @@ map $arg_q $dest { ~*(?i)\b(near\s+me|directions?\s+to|map\s+of)\b google_maps; # Wikipedia lookups ~*(?i)\b(?:biography|history|life\s+of)\b wikipedia; - # question detection - ~*(?i)\b(who|what|when|where|why|how|can|could|would|should|do|did|is|are|was|were|am|will|whom|whose|which)\b chatgpt; - # instructional queries - ~*(?i)\b(explain|describe|compare|define)\b chatgpt; + # ChatGPT queries + ~*(?i)^(add|allow|appear|ask|begin|believe|bring|build|buy|call)\b chatgpt; + ~*(?i)^(change|come|consider|create|cut|do|fall|feel|find)\b chatgpt; + ~*(?i)^(follow|forgive|generate|get|give|go|grow|have|hear|help)\b chatgpt; + ~*(?i)^(include|keep|kill|know|learn|leave|let|like|live|look)\b chatgpt; + ~*(?i)^(lose|love|make|mean|meet|move|offer|open|pay|play)\b chatgpt; + ~*(?i)^(provide|put|read|reach|remain|remember|send|serve|set)\b chatgpt; + ~*(?i)^(show|sit|speak|spend|stand|start|stay|stop|succeed|take)\b chatgpt; + ~*(?i)^(talk|tell|think|try|turn|understand|use|watch|work)\b chatgpt; + ~*(?i)^(write|what|how|why|when|where|who|which|whose|will)\b chatgpt; + ~*(?i)^(would|can|could|should|might|must|may|does|did|are|is)\b chatgpt; + ~*(?i)^(was|were|am|i|we|team|explain|list|compare|contrast)\b chatgpt; + ~*(?i)^(summarize|translate|define|describe|recommend|analyze)\b chatgpt; + ~*(?i)^(evaluate|outline|plan|design|develop|propose|edit)\b chatgpt; + ~*(?i)^(improve|fix|optimize|assist|research|brainstorm|suggest)\b chatgpt; + ~*(?i)^(solve|calculate|interpret|categorize|classify|format)\b chatgpt; + ~*(?i)^(style|proofread|review|listen|plot|graph|chart|map)\b chatgpt; + ~*(?i)^(diagram|simulate|predict|forecast|estimate|answer|reply)\b chatgpt; + ~*(?i)^(respond|inquire|query|request|discuss)\b chatgpt; # explicit wiki keywords ~*(?i)\bwikipedia\b|\bwiki\b wikipedia; ~*(?i)^nginx$ wikipedia;