Coverage for tools / web.py: 24%
58 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-29 02:55 +0800
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-29 02:55 +0800
1import requests
2from pydantic import BaseModel, Field
3from qrclaw.tools.registry import register
4from qrclaw.web_search.runtime import run_web_search, WebSearchError
5from qrclaw.logger import get_logger
7logger = get_logger("qrclaw.tools.web")
9class WebSearchArgs(BaseModel):
10 query: str = Field(description="搜索关键词,用自然语言描述想查找的内容")
12class WebFetchArgs(BaseModel):
13 url: str = Field(..., description="要抓取的网页 URL,例如 https://example.com")
15@register(description="联网搜索,获取最新信息,适合查找新闻、文档、技术资料", args_model=WebSearchArgs)
16def web_search(query: str) -> str:
17 """
18 执行 Web 搜索,自动选择可用的 Provider(例如 Tavily)
19 """
20 logger.debug(f"联网搜索: {query}")
21 try:
22 # 调用 Web Search Runtime
23 response = run_web_search(query=query, max_results=5)
25 # 格式化输出为 Markdown
26 lines = [f"### 搜索结果 (Provider: {response.provider})"]
27 if response.answer:
28 lines.append(f"\n**AI 摘要:**\n{response.answer}\n")
30 if not response.results:
31 return "未找到相关结果"
33 for i, r in enumerate(response.results, 1):
34 lines.append(f"{i}. [{r.title}]({r.url})")
35 snippet = r.snippet.replace("\n", " ").strip()
36 if len(snippet) > 200:
37 snippet = snippet[:200] + "..."
38 lines.append(f" > {snippet}\n")
40 result = "\n".join(lines)
41 logger.info(f"搜索成功: {query}, 找到 {response.count} 条结果")
42 return result
44 except WebSearchError as e:
45 error_msg = f"搜索配置错误: {e}"
46 logger.warning(error_msg)
47 return error_msg
48 except Exception as e:
49 error_msg = f"搜索执行失败: {e}"
50 logger.error(f"搜索失败: {query}, 错误: {e}", exc_info=True)
51 return error_msg
53@register(description="访问指定网页并提取纯净的 Markdown 正文,适合阅读文章、文档", args_model=WebFetchArgs)
54def web_fetch(url: str) -> str:
55 logger.debug(f"抓取网页正文: {url}")
56 try:
57 jina_url = f"https://r.jina.ai/{url}"
58 headers = {
59 "Accept": "application/json",
60 "X-Return-Format": "markdown"
61 }
62 response = requests.get(jina_url, headers=headers, timeout=30)
64 if response.status_code == 200:
65 data = response.json()
66 content = data.get("data", {}).get("content", "")
67 if not content:
68 content = data.get("data", {}).get("text", "正文为空")
69 logger.info(f"网页抓取成功: {url}, 长度: {len(content)}")
70 return content
71 else:
72 fallback_text = requests.get(jina_url, timeout=30).text
73 logger.info(f"网页抓取(降级)成功: {url}, 长度: {len(fallback_text)}")
74 return fallback_text
75 except Exception as e:
76 error_msg = f"网页抓取失败: {e}"
77 logger.error(error_msg)
78 return error_msg