Coverage for agentos/rag/__init__.py: 74%
19 statements
« prev ^ index » next coverage.py v7.14.3, created at 2026-07-02 09:59 +0800
« prev ^ index » next coverage.py v7.14.3, created at 2026-07-02 09:59 +0800
1"""
2RAG 模块入口 — 向量存储 / 文档加载 / 检索生成管道 / 混合搜索。
4v1.5.1: ChromaDB 向量存储 + PDF/DOCX/TXT 文档加载 + 基础 RAG Pipeline。
5v1.9.0: 混合搜索 + BM25 稀疏检索 + 跨编码器重排 + 引用追踪。
6"""
8from agentos.rag.store import VectorStore, ChromaStore
9from agentos.rag.loader import DocumentLoader, load_file, load_directory
10from agentos.rag.pipeline import RAGPipeline
11from agentos.rag.hybrid_search import (
12 HybridSearchEngine,
13 BM25Retriever,
14 DenseRetriever,
15 CrossEncoderReranker,
16 CitationTracker,
17 Citation,
18 SearchResult,
19 FusionMethod,
20)
22# 向后兼容别名
23BaseVectorStore = VectorStore
24FAISSVectorStore = None # FAISS 已移除,由 ChromaDB 替代
25ChromaVectorStore = ChromaStore
27# 配置兼容别名
28ChunkConfig = dict # chunk_size + chunk_overlap
29EmbeddingConfig = dict # model_name + device
31# TextChunker 兼容层
32class TextChunker:
33 """文本分块器(向后兼容)。"""
34 def __init__(self, chunk_size: int = 1000, chunk_overlap: int = 200):
35 self._loader = DocumentLoader(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
37 def chunk(self, text: str, source: str = "") -> list[str]:
38 docs = self._loader._chunk(text, source)
39 return [d.content for d in docs]
41 def chunk_file(self, path: str) -> list[str]:
42 docs = self._loader.load_file(path)
43 return [d.content for d in docs]
46__all__ = [
47 "VectorStore",
48 "ChromaStore",
49 "DocumentLoader",
50 "load_file",
51 "load_directory",
52 "RAGPipeline",
53 # 混合搜索 v1.9.0
54 "HybridSearchEngine",
55 "BM25Retriever",
56 "DenseRetriever",
57 "CrossEncoderReranker",
58 "CitationTracker",
59 "Citation",
60 "SearchResult",
61 "FusionMethod",
62 # 向后兼容
63 "BaseVectorStore",
64 "FAISSVectorStore",
65 "ChromaVectorStore",
66 "TextChunker",
67]