Coverage for tools / web.py: 24%

58 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-29 02:55 +0800

1import requests 

2from pydantic import BaseModel, Field 

3from qrclaw.tools.registry import register 

4from qrclaw.web_search.runtime import run_web_search, WebSearchError 

5from qrclaw.logger import get_logger 

6 

7logger = get_logger("qrclaw.tools.web") 

8 

9class WebSearchArgs(BaseModel): 

10 query: str = Field(description="搜索关键词,用自然语言描述想查找的内容") 

11 

12class WebFetchArgs(BaseModel): 

13 url: str = Field(..., description="要抓取的网页 URL,例如 https://example.com") 

14 

15@register(description="联网搜索,获取最新信息,适合查找新闻、文档、技术资料", args_model=WebSearchArgs) 

16def web_search(query: str) -> str: 

17 """ 

18 执行 Web 搜索,自动选择可用的 Provider(例如 Tavily) 

19 """ 

20 logger.debug(f"联网搜索: {query}") 

21 try: 

22 # 调用 Web Search Runtime 

23 response = run_web_search(query=query, max_results=5) 

24 

25 # 格式化输出为 Markdown 

26 lines = [f"### 搜索结果 (Provider: {response.provider})"] 

27 if response.answer: 

28 lines.append(f"\n**AI 摘要:**\n{response.answer}\n") 

29 

30 if not response.results: 

31 return "未找到相关结果" 

32 

33 for i, r in enumerate(response.results, 1): 

34 lines.append(f"{i}. [{r.title}]({r.url})") 

35 snippet = r.snippet.replace("\n", " ").strip() 

36 if len(snippet) > 200: 

37 snippet = snippet[:200] + "..." 

38 lines.append(f" > {snippet}\n") 

39 

40 result = "\n".join(lines) 

41 logger.info(f"搜索成功: {query}, 找到 {response.count} 条结果") 

42 return result 

43 

44 except WebSearchError as e: 

45 error_msg = f"搜索配置错误: {e}" 

46 logger.warning(error_msg) 

47 return error_msg 

48 except Exception as e: 

49 error_msg = f"搜索执行失败: {e}" 

50 logger.error(f"搜索失败: {query}, 错误: {e}", exc_info=True) 

51 return error_msg 

52 

53@register(description="访问指定网页并提取纯净的 Markdown 正文,适合阅读文章、文档", args_model=WebFetchArgs) 

54def web_fetch(url: str) -> str: 

55 logger.debug(f"抓取网页正文: {url}") 

56 try: 

57 jina_url = f"https://r.jina.ai/{url}" 

58 headers = { 

59 "Accept": "application/json", 

60 "X-Return-Format": "markdown" 

61 } 

62 response = requests.get(jina_url, headers=headers, timeout=30) 

63 

64 if response.status_code == 200: 

65 data = response.json() 

66 content = data.get("data", {}).get("content", "") 

67 if not content: 

68 content = data.get("data", {}).get("text", "正文为空") 

69 logger.info(f"网页抓取成功: {url}, 长度: {len(content)}") 

70 return content 

71 else: 

72 fallback_text = requests.get(jina_url, timeout=30).text 

73 logger.info(f"网页抓取(降级)成功: {url}, 长度: {len(fallback_text)}") 

74 return fallback_text 

75 except Exception as e: 

76 error_msg = f"网页抓取失败: {e}" 

77 logger.error(error_msg) 

78 return error_msg