subcog/hooks/search_intent/
keyword.rs

1//! Keyword-based search intent detection.
2//!
3//! This module provides fast, pattern-based intent detection using regex signals.
4//! Detection typically completes in under 10ms.
5
6use super::types::{DetectionSource, SearchIntent, SearchIntentType};
7use crate::hooks::search_patterns::{SEARCH_SIGNALS, STOP_WORDS, SearchSignal};
8
9/// Detects search intent from a user prompt using keyword pattern matching.
10///
11/// Analyzes the prompt for search signals (e.g., "how do I", "where is")
12/// and extracts intent type, confidence, keywords, and topics.
13///
14/// # Arguments
15///
16/// * `prompt` - The user prompt to analyze.
17///
18/// # Returns
19///
20/// A `SearchIntent` if search signals are detected, `None` otherwise.
21///
22/// # Performance
23///
24/// Typically completes in under 10ms.
25#[must_use]
26pub fn detect_search_intent(prompt: &str) -> Option<SearchIntent> {
27    if prompt.is_empty() {
28        return None;
29    }
30
31    let prompt_lower = prompt.to_lowercase();
32    let mut matched_signals: Vec<(&SearchSignal, String)> = Vec::new();
33
34    // Check each signal pattern
35    for signal in SEARCH_SIGNALS.iter() {
36        if let Some(matched) = signal.pattern.find(&prompt_lower) {
37            matched_signals.push((signal, matched.as_str().to_string()));
38        }
39    }
40
41    if matched_signals.is_empty() {
42        return None;
43    }
44
45    // Determine primary intent type by counting matches
46    let intent_type = determine_primary_intent(&matched_signals);
47
48    // Calculate confidence before consuming matched_signals
49    let confidence = calculate_confidence(&matched_signals, prompt);
50
51    // Extract keywords that triggered detection - consume matched_signals to avoid clones
52    let keywords: Vec<String> = matched_signals.into_iter().map(|(_, m)| m).collect();
53
54    // Extract topics from the prompt
55    let topics = extract_topics(prompt);
56
57    Some(SearchIntent {
58        intent_type,
59        confidence,
60        keywords,
61        topics,
62        source: DetectionSource::Keyword,
63    })
64}
65
66/// Determines the primary intent type from matched signals.
67fn determine_primary_intent(matched_signals: &[(&SearchSignal, String)]) -> SearchIntentType {
68    use std::collections::HashMap;
69
70    let mut intent_counts: HashMap<SearchIntentType, usize> = HashMap::new();
71
72    for (signal, _) in matched_signals {
73        *intent_counts.entry(signal.intent_type).or_insert(0) += 1;
74    }
75
76    // Prioritize more specific intents over General
77    let priority_order = [
78        SearchIntentType::HowTo,
79        SearchIntentType::Troubleshoot,
80        SearchIntentType::Location,
81        SearchIntentType::Explanation,
82        SearchIntentType::Comparison,
83        SearchIntentType::General,
84    ];
85
86    for intent in priority_order {
87        if intent_counts.contains_key(&intent) {
88            return intent;
89        }
90    }
91
92    SearchIntentType::General
93}
94
95/// Calculates confidence score based on matched signals and prompt characteristics.
96#[allow(clippy::cast_precision_loss)]
97fn calculate_confidence(matched_signals: &[(&SearchSignal, String)], prompt: &str) -> f32 {
98    let base_confidence: f32 = 0.5;
99
100    // Bonus for multiple matches (max +0.15)
101    let match_bonus = 0.15_f32.min(matched_signals.len() as f32 * 0.05);
102
103    // Bonus for longer prompts (more context)
104    let length_factor = if prompt.len() > 50 { 0.1 } else { 0.0 };
105
106    // Bonus for multiple sentences (more structured query)
107    let sentence_count = prompt
108        .chars()
109        .filter(|&c| c == '.' || c == '?' || c == '!')
110        .count();
111    let sentence_factor = if sentence_count > 1 { 0.1 } else { 0.0 };
112
113    // Bonus for question marks (explicit question)
114    let question_factor = if prompt.contains('?') { 0.1 } else { 0.0 };
115
116    (base_confidence + match_bonus + length_factor + sentence_factor + question_factor).min(0.95)
117}
118
119/// Extracts topics from a prompt.
120///
121/// Topics are significant words that might map to memory tags or namespaces.
122///
123/// # Performance
124///
125/// Uses linear deduplication via `Vec::contains()` instead of `HashSet` since
126/// we limit to 5 topics. This avoids allocating a separate `HashSet` and cloning
127/// strings for both collections.
128pub fn extract_topics(prompt: &str) -> Vec<String> {
129    let mut topics = Vec::with_capacity(5);
130
131    // Simple word tokenization and filtering - iterate directly without collecting
132    for word in prompt.split(|c: char| c.is_whitespace() || c == ',' || c == ';' || c == ':') {
133        if word.is_empty() {
134            continue;
135        }
136
137        // Clean up the word
138        let cleaned = word
139            .trim_matches(|c: char| !c.is_alphanumeric() && c != '-' && c != '_')
140            .to_lowercase();
141
142        // Filter criteria
143        if cleaned.len() < 3 {
144            continue;
145        }
146        if STOP_WORDS.contains(cleaned.as_str()) {
147            continue;
148        }
149        // Skip pure numbers
150        if cleaned.chars().all(char::is_numeric) {
151            continue;
152        }
153        // Deduplicate using linear search (O(n) but n <= 5)
154        if topics.contains(&cleaned) {
155            continue;
156        }
157
158        topics.push(cleaned);
159
160        // Early exit once we have 5 topics
161        if topics.len() >= 5 {
162            break;
163        }
164    }
165
166    topics
167}
168
169#[cfg(test)]
170mod tests {
171    use super::*;
172
173    #[test]
174    fn test_detect_howto_intent() {
175        let result = detect_search_intent("How do I implement authentication?");
176        assert!(result.is_some());
177        let intent = result.unwrap();
178        assert_eq!(intent.intent_type, SearchIntentType::HowTo);
179        assert!(intent.confidence >= 0.5);
180    }
181
182    #[test]
183    fn test_detect_troubleshoot_intent() {
184        let result = detect_search_intent("Why am I getting an error in the database?");
185        assert!(result.is_some());
186        let intent = result.unwrap();
187        assert_eq!(intent.intent_type, SearchIntentType::Troubleshoot);
188    }
189
190    #[test]
191    fn test_detect_location_intent() {
192        let result = detect_search_intent("Where is the configuration file?");
193        assert!(result.is_some());
194        let intent = result.unwrap();
195        assert_eq!(intent.intent_type, SearchIntentType::Location);
196    }
197
198    #[test]
199    fn test_detect_explanation_intent() {
200        let result = detect_search_intent("What is the ServiceContainer?");
201        assert!(result.is_some());
202        let intent = result.unwrap();
203        assert_eq!(intent.intent_type, SearchIntentType::Explanation);
204    }
205
206    #[test]
207    fn test_detect_comparison_intent() {
208        let result = detect_search_intent("What's the difference between SQLite and PostgreSQL?");
209        assert!(result.is_some());
210        let intent = result.unwrap();
211        assert_eq!(intent.intent_type, SearchIntentType::Comparison);
212    }
213
214    #[test]
215    fn test_no_intent_detected() {
216        let result = detect_search_intent("Hello, world!");
217        assert!(result.is_none());
218    }
219
220    #[test]
221    fn test_empty_prompt() {
222        let result = detect_search_intent("");
223        assert!(result.is_none());
224    }
225
226    #[test]
227    fn test_extract_topics() {
228        let topics = extract_topics("How do I implement authentication with OAuth?");
229        assert!(topics.contains(&"implement".to_string()));
230        assert!(topics.contains(&"authentication".to_string()));
231        assert!(topics.contains(&"oauth".to_string()));
232        // "how", "do", "I", "with" should be filtered out
233        assert!(!topics.contains(&"how".to_string()));
234        assert!(!topics.contains(&"with".to_string()));
235    }
236
237    #[test]
238    fn test_topics_limit() {
239        let topics =
240            extract_topics("one two three four five six seven eight nine ten eleven twelve");
241        assert!(topics.len() <= 5);
242    }
243
244    #[test]
245    fn test_confidence_increases_with_question_mark() {
246        let with_question = detect_search_intent("How do I test this?").unwrap();
247        let without_question = detect_search_intent("How do I test this").unwrap();
248        assert!(with_question.confidence > without_question.confidence);
249    }
250
251    #[test]
252    fn test_confidence_capped_at_95() {
253        // Multiple signals should not exceed 0.95
254        let result = detect_search_intent(
255            "How do I fix this error? Where is the problem? What is the solution?",
256        );
257        assert!(result.is_some());
258        assert!(result.unwrap().confidence <= 0.95);
259    }
260}