Coverage for agentos/rag/__init__.py: 74%

19 statements  

« prev     ^ index     » next       coverage.py v7.14.3, created at 2026-07-02 09:59 +0800

1""" 

2RAG 模块入口 — 向量存储 / 文档加载 / 检索生成管道 / 混合搜索。 

3 

4v1.5.1: ChromaDB 向量存储 + PDF/DOCX/TXT 文档加载 + 基础 RAG Pipeline。 

5v1.9.0: 混合搜索 + BM25 稀疏检索 + 跨编码器重排 + 引用追踪。 

6""" 

7 

8from agentos.rag.store import VectorStore, ChromaStore 

9from agentos.rag.loader import DocumentLoader, load_file, load_directory 

10from agentos.rag.pipeline import RAGPipeline 

11from agentos.rag.hybrid_search import ( 

12 HybridSearchEngine, 

13 BM25Retriever, 

14 DenseRetriever, 

15 CrossEncoderReranker, 

16 CitationTracker, 

17 Citation, 

18 SearchResult, 

19 FusionMethod, 

20) 

21 

22# 向后兼容别名 

23BaseVectorStore = VectorStore 

24FAISSVectorStore = None # FAISS 已移除,由 ChromaDB 替代 

25ChromaVectorStore = ChromaStore 

26 

27# 配置兼容别名 

28ChunkConfig = dict # chunk_size + chunk_overlap 

29EmbeddingConfig = dict # model_name + device 

30 

31# TextChunker 兼容层 

32class TextChunker: 

33 """文本分块器(向后兼容)。""" 

34 def __init__(self, chunk_size: int = 1000, chunk_overlap: int = 200): 

35 self._loader = DocumentLoader(chunk_size=chunk_size, chunk_overlap=chunk_overlap) 

36 

37 def chunk(self, text: str, source: str = "") -> list[str]: 

38 docs = self._loader._chunk(text, source) 

39 return [d.content for d in docs] 

40 

41 def chunk_file(self, path: str) -> list[str]: 

42 docs = self._loader.load_file(path) 

43 return [d.content for d in docs] 

44 

45 

46__all__ = [ 

47 "VectorStore", 

48 "ChromaStore", 

49 "DocumentLoader", 

50 "load_file", 

51 "load_directory", 

52 "RAGPipeline", 

53 # 混合搜索 v1.9.0 

54 "HybridSearchEngine", 

55 "BM25Retriever", 

56 "DenseRetriever", 

57 "CrossEncoderReranker", 

58 "CitationTracker", 

59 "Citation", 

60 "SearchResult", 

61 "FusionMethod", 

62 # 向后兼容 

63 "BaseVectorStore", 

64 "FAISSVectorStore", 

65 "ChromaVectorStore", 

66 "TextChunker", 

67]