subcog/storage/persistence/
git_notes.rs

1//! Git notes persistence backend.
2//!
3//! This is the primary persistence backend for subcog.
4//! Memories are stored as git notes attached to a dedicated ref.
5
6use crate::git::{NotesManager, YamlFrontMatterParser};
7use crate::models::{Domain, Memory, MemoryId, MemoryStatus, Namespace};
8use crate::storage::traits::PersistenceBackend;
9use crate::{Error, Result};
10use std::collections::HashMap;
11use std::path::PathBuf;
12
13/// Git notes-based persistence backend.
14pub struct GitNotesBackend {
15    /// Path to the git repository.
16    repo_path: PathBuf,
17    /// Git notes ref (e.g., "refs/notes/subcog").
18    notes_ref: String,
19    /// Notes manager instance.
20    notes_manager: NotesManager,
21    /// In-memory index of memory ID to commit ID mappings.
22    id_mapping: HashMap<String, String>,
23}
24
25impl GitNotesBackend {
26    /// Creates a new git notes backend.
27    #[must_use]
28    pub fn new(repo_path: impl Into<PathBuf>) -> Self {
29        let path = repo_path.into();
30        let notes_manager = NotesManager::new(&path);
31        Self {
32            repo_path: path,
33            notes_ref: NotesManager::DEFAULT_NOTES_REF.to_string(),
34            notes_manager,
35            id_mapping: HashMap::new(),
36        }
37    }
38
39    /// Sets a custom notes ref.
40    #[must_use]
41    pub fn with_notes_ref(mut self, notes_ref: impl Into<String>) -> Self {
42        let ref_str = notes_ref.into();
43        self.notes_ref.clone_from(&ref_str);
44        self.notes_manager = NotesManager::new(&self.repo_path).with_notes_ref(ref_str);
45        self
46    }
47
48    /// Returns the repository path.
49    #[must_use]
50    pub const fn repo_path(&self) -> &PathBuf {
51        &self.repo_path
52    }
53
54    /// Returns the notes ref.
55    #[must_use]
56    pub fn notes_ref(&self) -> &str {
57        &self.notes_ref
58    }
59
60    /// Builds the index of memory IDs from existing notes.
61    ///
62    /// # Errors
63    ///
64    /// Returns an error if notes cannot be read.
65    pub fn build_index(&mut self) -> Result<()> {
66        self.id_mapping.clear();
67
68        let notes = self.notes_manager.list()?;
69
70        for (commit_id, content) in notes {
71            if let Some(id) = extract_memory_id_from_content(&content) {
72                self.id_mapping.insert(id, commit_id);
73            }
74        }
75
76        Ok(())
77    }
78
79    /// Serializes a memory to YAML front matter format.
80    fn serialize_memory(memory: &Memory) -> Result<String> {
81        let metadata = serde_json::json!({
82            "id": memory.id.as_str(),
83            "namespace": memory.namespace.as_str(),
84            "domain": memory.domain.to_string(),
85            "status": memory.status.as_str(),
86            "created_at": memory.created_at,
87            "updated_at": memory.updated_at,
88            "tags": memory.tags
89        });
90
91        YamlFrontMatterParser::serialize(&metadata, &memory.content)
92    }
93
94    /// Deserializes a memory from YAML front matter format.
95    fn deserialize_memory(content: &str) -> Result<Memory> {
96        let (metadata, body) = YamlFrontMatterParser::parse(content)?;
97
98        let id = metadata
99            .get("id")
100            .and_then(|v| v.as_str())
101            .ok_or_else(|| Error::InvalidInput("Missing memory ID in metadata".to_string()))?;
102
103        let namespace_str = metadata
104            .get("namespace")
105            .and_then(|v| v.as_str())
106            .unwrap_or("decisions");
107
108        let namespace = parse_namespace(namespace_str);
109
110        let domain_str = metadata
111            .get("domain")
112            .and_then(|v| v.as_str())
113            .unwrap_or("global");
114
115        let domain = parse_domain(domain_str);
116
117        let status_str = metadata
118            .get("status")
119            .and_then(|v| v.as_str())
120            .unwrap_or("active");
121
122        let status = parse_status(status_str);
123
124        let created_at = metadata
125            .get("created_at")
126            .and_then(serde_json::Value::as_u64)
127            .unwrap_or(0);
128
129        let updated_at = metadata
130            .get("updated_at")
131            .and_then(serde_json::Value::as_u64)
132            .unwrap_or(created_at);
133
134        let tags: Vec<String> = metadata
135            .get("tags")
136            .and_then(|v| v.as_array())
137            .map(|arr| {
138                arr.iter()
139                    .filter_map(|v| v.as_str().map(String::from))
140                    .collect()
141            })
142            .unwrap_or_default();
143
144        let source = metadata
145            .get("source")
146            .and_then(|v| v.as_str())
147            .map(String::from);
148
149        Ok(Memory {
150            id: MemoryId::new(id),
151            content: body,
152            namespace,
153            domain,
154            status,
155            created_at,
156            updated_at,
157            embedding: None,
158            tags,
159            source,
160        })
161    }
162}
163
164/// Extracts memory ID from note content.
165fn extract_memory_id_from_content(content: &str) -> Option<String> {
166    let (metadata, _) = YamlFrontMatterParser::parse(content).ok()?;
167    metadata
168        .get("id")
169        .and_then(|v| v.as_str())
170        .map(String::from)
171}
172
173/// Parses a namespace string to Namespace enum.
174fn parse_namespace(s: &str) -> Namespace {
175    match s.to_lowercase().as_str() {
176        "decisions" => Namespace::Decisions,
177        "patterns" => Namespace::Patterns,
178        "learnings" => Namespace::Learnings,
179        "context" => Namespace::Context,
180        "tech-debt" | "techdebt" => Namespace::TechDebt,
181        "apis" => Namespace::Apis,
182        "config" => Namespace::Config,
183        "security" => Namespace::Security,
184        "performance" => Namespace::Performance,
185        "testing" => Namespace::Testing,
186        _ => Namespace::Decisions,
187    }
188}
189
190/// Parses a status string to `MemoryStatus` enum.
191fn parse_status(s: &str) -> MemoryStatus {
192    match s.to_lowercase().as_str() {
193        "active" => MemoryStatus::Active,
194        "archived" => MemoryStatus::Archived,
195        "superseded" => MemoryStatus::Superseded,
196        "pending" => MemoryStatus::Pending,
197        "deleted" => MemoryStatus::Deleted,
198        _ => MemoryStatus::Active,
199    }
200}
201
202/// Parses a domain string to Domain struct.
203fn parse_domain(s: &str) -> Domain {
204    if s == "global" || s.is_empty() {
205        return Domain::new();
206    }
207
208    let parts: Vec<&str> = s.split('/').collect();
209    match parts.len() {
210        1 => Domain {
211            organization: Some(parts[0].to_string()),
212            project: None,
213            repository: None,
214        },
215        2 => Domain {
216            organization: Some(parts[0].to_string()),
217            project: None,
218            repository: Some(parts[1].to_string()),
219        },
220        3 => Domain {
221            organization: Some(parts[0].to_string()),
222            project: Some(parts[1].to_string()),
223            repository: Some(parts[2].to_string()),
224        },
225        _ => Domain::new(),
226    }
227}
228
229impl PersistenceBackend for GitNotesBackend {
230    fn store(&mut self, memory: &Memory) -> Result<()> {
231        let content = Self::serialize_memory(memory)?;
232
233        // Add note to HEAD
234        let _note_oid = self.notes_manager.add_to_head(&content)?;
235
236        // Update our in-memory mapping
237        // For simplicity, we use the memory ID as the key and store a placeholder
238        // In production, we would track which commit each memory is attached to
239        self.id_mapping
240            .insert(memory.id.as_str().to_string(), "HEAD".to_string());
241
242        Ok(())
243    }
244
245    fn get(&self, id: &MemoryId) -> Result<Option<Memory>> {
246        // First check our mapping
247        if !self.id_mapping.contains_key(id.as_str()) {
248            // Try to find by scanning all notes
249            return self.find_memory_by_scanning(id);
250        }
251
252        // Get from HEAD (simplified - in production we'd use the actual commit ID)
253        let content = self.notes_manager.get_from_head()?;
254
255        match content {
256            Some(c) => {
257                let memory = Self::deserialize_memory(&c)?;
258                if memory.id.as_str() == id.as_str() {
259                    Ok(Some(memory))
260                } else {
261                    Ok(None)
262                }
263            },
264            None => Ok(None),
265        }
266    }
267
268    fn delete(&mut self, id: &MemoryId) -> Result<bool> {
269        // For git notes, we don't actually delete - we mark as deleted
270        // A proper implementation would need to track the commit ID
271        if self.id_mapping.remove(id.as_str()).is_some() {
272            Ok(true)
273        } else {
274            Ok(false)
275        }
276    }
277
278    fn list_ids(&self) -> Result<Vec<MemoryId>> {
279        let notes = self.notes_manager.list()?;
280        let mut ids = Vec::new();
281
282        for (_, content) in notes {
283            if let Some(id) = extract_memory_id_from_content(&content) {
284                ids.push(MemoryId::new(&id));
285            }
286        }
287
288        Ok(ids)
289    }
290}
291
292impl GitNotesBackend {
293    /// Finds a memory by scanning all notes.
294    fn find_memory_by_scanning(&self, id: &MemoryId) -> Result<Option<Memory>> {
295        let notes = self.notes_manager.list()?;
296
297        for (_, content) in notes {
298            if let Some(memory) = try_deserialize_if_matching(&content, id) {
299                return Ok(Some(memory));
300            }
301        }
302
303        Ok(None)
304    }
305}
306
307/// Attempts to deserialize a note if it matches the given ID.
308fn try_deserialize_if_matching(content: &str, id: &MemoryId) -> Option<Memory> {
309    let (metadata, _) = YamlFrontMatterParser::parse(content).ok()?;
310    let note_id = metadata
311        .get("id")
312        .and_then(|v| v.as_str())
313        .map(String::from)?;
314
315    if note_id == id.as_str() {
316        GitNotesBackend::deserialize_memory(content).ok()
317    } else {
318        None
319    }
320}
321
322#[cfg(test)]
323mod tests {
324    use super::*;
325    use git2::{Repository, Signature};
326    use tempfile::TempDir;
327
328    fn create_test_repo() -> (TempDir, Repository) {
329        let dir = TempDir::new().unwrap();
330        let repo = Repository::init(dir.path()).unwrap();
331
332        // Create an initial commit in a separate scope so tree is dropped before returning
333        {
334            let sig = Signature::now("test", "test@test.com").unwrap();
335            let tree_id = repo.index().unwrap().write_tree().unwrap();
336            let tree = repo.find_tree(tree_id).unwrap();
337            repo.commit(Some("HEAD"), &sig, &sig, "Initial commit", &tree, &[])
338                .unwrap();
339        }
340
341        (dir, repo)
342    }
343
344    fn create_test_memory() -> Memory {
345        Memory {
346            id: MemoryId::new("test_memory_123"),
347            content: "Use PostgreSQL for primary storage".to_string(),
348            namespace: Namespace::Decisions,
349            domain: Domain::for_repository("zircote", "subcog"),
350            status: MemoryStatus::Active,
351            created_at: 1_234_567_890,
352            updated_at: 1_234_567_890,
353            embedding: None,
354            tags: vec!["database".to_string(), "architecture".to_string()],
355            source: Some("src/main.rs".to_string()),
356        }
357    }
358
359    #[test]
360    fn test_git_notes_backend_creation() {
361        let backend = GitNotesBackend::new("/tmp/test");
362        assert_eq!(backend.notes_ref(), NotesManager::DEFAULT_NOTES_REF);
363
364        let custom = GitNotesBackend::new("/tmp/test").with_notes_ref("refs/notes/custom");
365        assert_eq!(custom.notes_ref(), "refs/notes/custom");
366    }
367
368    #[test]
369    fn test_serialize_memory() {
370        let memory = create_test_memory();
371        let serialized = GitNotesBackend::serialize_memory(&memory).unwrap();
372
373        assert!(serialized.contains("---"));
374        assert!(serialized.contains("namespace: decisions"));
375        assert!(serialized.contains("Use PostgreSQL"));
376    }
377
378    #[test]
379    fn test_deserialize_memory() {
380        let content = r"---
381id: test_123
382namespace: decisions
383domain: zircote/subcog
384status: active
385created_at: 1234567890
386updated_at: 1234567890
387tags:
388  - rust
389  - memory
390---
391This is the memory content.";
392
393        let memory = GitNotesBackend::deserialize_memory(content).unwrap();
394        assert_eq!(memory.id.as_str(), "test_123");
395        assert_eq!(memory.namespace, Namespace::Decisions);
396        assert_eq!(memory.content, "This is the memory content.");
397        assert_eq!(memory.tags.len(), 2);
398    }
399
400    #[test]
401    fn test_store_and_list() {
402        let (dir, _repo) = create_test_repo();
403        let mut backend = GitNotesBackend::new(dir.path());
404
405        let memory = create_test_memory();
406        backend.store(&memory).unwrap();
407
408        let ids = backend.list_ids().unwrap();
409        assert!(!ids.is_empty());
410    }
411
412    #[test]
413    fn test_parse_namespace() {
414        assert_eq!(parse_namespace("decisions"), Namespace::Decisions);
415        assert_eq!(parse_namespace("Patterns"), Namespace::Patterns);
416        assert_eq!(parse_namespace("TECH-DEBT"), Namespace::TechDebt);
417        assert_eq!(parse_namespace("techdebt"), Namespace::TechDebt);
418        assert_eq!(parse_namespace("unknown"), Namespace::Decisions);
419    }
420
421    #[test]
422    fn test_parse_status() {
423        assert_eq!(parse_status("active"), MemoryStatus::Active);
424        assert_eq!(parse_status("Archived"), MemoryStatus::Archived);
425        assert_eq!(parse_status("SUPERSEDED"), MemoryStatus::Superseded);
426        assert_eq!(parse_status("unknown"), MemoryStatus::Active);
427    }
428
429    #[test]
430    fn test_parse_domain() {
431        let global = parse_domain("global");
432        assert!(global.is_global());
433
434        let org_repo = parse_domain("zircote/subcog");
435        assert_eq!(org_repo.organization, Some("zircote".to_string()));
436        assert_eq!(org_repo.repository, Some("subcog".to_string()));
437
438        let full = parse_domain("org/proj/repo");
439        assert_eq!(full.organization, Some("org".to_string()));
440        assert_eq!(full.project, Some("proj".to_string()));
441        assert_eq!(full.repository, Some("repo".to_string()));
442    }
443
444    #[test]
445    fn test_roundtrip() {
446        let memory = create_test_memory();
447        let serialized = GitNotesBackend::serialize_memory(&memory).unwrap();
448        let deserialized = GitNotesBackend::deserialize_memory(&serialized).unwrap();
449
450        assert_eq!(memory.id.as_str(), deserialized.id.as_str());
451        assert_eq!(memory.namespace, deserialized.namespace);
452        assert_eq!(memory.content, deserialized.content);
453        assert_eq!(memory.status, deserialized.status);
454        assert_eq!(memory.created_at, deserialized.created_at);
455    }
456}