Coverage for src / kemi / models.py: 100%

84 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-06-05 15:47 +0000

1from dataclasses import dataclass, field 

2from datetime import datetime, timezone 

3from enum import Enum 

4from typing import Any 

5 

6 

7class MemorySource(Enum): 

8 USER_STATED = "user_stated" 

9 AGENT_INFERRED = "agent_inferred" 

10 SYSTEM_GENERATED = "system_generated" 

11 

12 

13class LifecycleState(Enum): 

14 ACTIVE = "active" 

15 DECAYING = "decaying" 

16 ARCHIVED = "archived" 

17 DELETED = "deleted" 

18 

19 

20class MemoryType(Enum): 

21 """Type of memory: episodic (event-based), semantic (fact-based), or procedural (how-to).""" 

22 

23 EPISODIC = "episodic" 

24 SEMANTIC = "semantic" 

25 PROCEDURAL = "procedural" 

26 

27 

28@dataclass 

29class MemoryObject: 

30 memory_id: str 

31 user_id: str 

32 content: str 

33 embedding: list[float] | None = None 

34 score: float = 0.0 

35 created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) 

36 last_accessed_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) 

37 source: MemorySource = MemorySource.USER_STATED 

38 importance: float = 0.5 

39 lifecycle_state: LifecycleState = LifecycleState.ACTIVE 

40 metadata: dict[str, Any] = field(default_factory=dict) 

41 embedding_dim: int | None = None 

42 tags: list[str] = field(default_factory=list) 

43 confidence: float = 1.0 

44 memory_type: MemoryType = MemoryType.EPISODIC 

45 session_id: str | None = None 

46 namespace: str = "default" 

47 version: int = 1 

48 agent_id: str | None = None 

49 run_id: str | None = None 

50 app_id: str | None = None 

51 expires_at: datetime | None = None 

52 

53 

54@dataclass 

55class MemoryConfig: 

56 dedup_threshold: float = 0.85 

57 conflict_threshold: float = 0.65 

58 decay_half_life_hours: float = 168.0 

59 decay_threshold_hours: float = 720.0 

60 default_importance: float = 0.5 

61 sanitize: bool = False 

62 default_top_k: int = 5 

63 max_tokens_default: int | None = None 

64 hybrid_search: bool = True 

65 hooks_raise_on_error: bool = True 

66 # Scoring weights for hybrid search 

67 weight_semantic: float = 0.6 

68 weight_recency: float = 0.25 

69 weight_bm25: float = 0.15 

70 # Summarizer configuration for LLM-powered consolidation 

71 summarizer_llm_provider: str | None = None 

72 summarizer_llm_model: str | None = None 

73 summarizer_prompt_template: str | None = None 

74 # Scoring weights for non-hybrid search (when query_embedding is empty or hybrid_search=False) 

75 weight_semantic_no_embed: float = 0.5 

76 weight_recency_no_embed: float = 0.3 

77 weight_importance: float = 0.2 

78 # Entity-aware retrieval 

79 enable_entity_boost: bool = False 

80 entity_boost_weight: float = 0.1 

81 

82 def __post_init__(self) -> None: 

83 if not 0.0 <= self.dedup_threshold <= 1.0: 

84 raise ValueError( 

85 f"dedup_threshold must be between 0.0 and 1.0, got {self.dedup_threshold}" 

86 ) 

87 if not 0.0 <= self.conflict_threshold <= 1.0: 

88 raise ValueError( 

89 f"conflict_threshold must be between 0.0 and 1.0, got {self.conflict_threshold}" 

90 ) 

91 if self.decay_half_life_hours <= 0: 

92 raise ValueError(f"decay_half_life_hours must be > 0, got {self.decay_half_life_hours}") 

93 if self.decay_threshold_hours <= 0: 

94 raise ValueError(f"decay_threshold_hours must be > 0, got {self.decay_threshold_hours}") 

95 if not 0.0 <= self.default_importance <= 1.0: 

96 raise ValueError( 

97 f"default_importance must be between 0.0 and 1.0, got {self.default_importance}" 

98 ) 

99 if self.default_top_k < 1: 

100 raise ValueError(f"default_top_k must be >= 1, got {self.default_top_k}") 

101 if self.max_tokens_default is not None and self.max_tokens_default < 1: 

102 raise ValueError(f"max_tokens_default must be >= 1, got {self.max_tokens_default}") 

103 if not 0.0 <= self.entity_boost_weight <= 1.0: 

104 raise ValueError( 

105 f"entity_boost_weight must be between 0.0 and 1.0, got {self.entity_boost_weight}" 

106 ) 

107 # Validate scoring weights sum to ~1.0 (with some tolerance) 

108 total_hybrid = self.weight_semantic + self.weight_recency + self.weight_bm25 

109 if abs(total_hybrid - 1.0) > 0.01: 

110 raise ValueError( 

111 "Scoring weights (weight_semantic + weight_recency + weight_bm25) " 

112 f"must sum to 1.0, got {total_hybrid}" 

113 ) 

114 total_no_embed = ( 

115 self.weight_semantic_no_embed + self.weight_recency_no_embed + self.weight_importance 

116 ) 

117 if abs(total_no_embed - 1.0) > 0.01: 

118 raise ValueError( 

119 "Scoring weights (weight_semantic_no_embed + weight_recency_no_embed + " 

120 f"weight_importance) must sum to 1.0, got {total_no_embed}" 

121 )