import websockets import json import re import httpx import asyncio import os, sys core_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'core') sys.path.append(core_path) from general_process import main_process, wiseflow_logger from typing import Optional import logging logging.getLogger("httpx").setLevel(logging.WARNING) # 千万注意扫码登录时不要选择“同步历史消息”，否则会造成 bot 上来挨个回复历史消息 # 先检查下 wx 的登录状态，同时获取已登录微信的 wxid WX_BOT_ENDPOINT = os.environ.get('WX_BOT_ENDPOINT', '127.0.0.1:8066') wx_url = f"http://{WX_BOT_ENDPOINT}/api/" try: # 发送GET请求 response = httpx.get(f"{wx_url}checklogin") response.raise_for_status() # 检查HTTP响应状态码是否为200 # 解析JSON响应 data = response.json() # 检查status字段 if data['data']['status'] == 1: # 记录wxid self_wxid = data['data']['wxid'] wiseflow_logger.info(f"已登录微信账号: {self_wxid}") else: # 抛出异常 wiseflow_logger.error("未检测到任何登录信息，将退出") raise ValueError("登录失败，status不为1") except Exception as e: wiseflow_logger.error(f"无法链接微信端点:{wx_url}, 错误：\n{e}") raise ValueError("登录失败，无法连接") # 获取登录微信昵称，用于后面判断是否@自己的消息 response = httpx.get(f"{wx_url}userinfo") response.raise_for_status() # 检查HTTP响应状态码是否为200 # 解析JSON响应 data = response.json() self_nickname = data['data'].get('nickname', " ") wiseflow_logger.info(f"self_nickname: {self_nickname}") # 如果要选定只监控部分公众号，请在同一文件夹内创建 config.json 文件，内容为要监控的公众号列表 # 注意这里要写公众号的原始id，即 gh_ 开头的id, 可以通过历史 logger 获取 config_file = 'config.json' if not os.path.exists(config_file): config = None else: with open(config_file, 'r', encoding='utf-8') as f: config = json.load(f) #如下 pattern 仅适用于public msg的解析，群内分享的公号文章不在此列 # The XML parsing scheme is not used because there are abnormal characters in the XML code extracted from the weixin public_msg item_pattern = re.compile(r'(.*?)', re.DOTALL) url_pattern = re.compile(r'') async def get_public_msg(websocket_uri): reconnect_attempts = 0 max_reconnect_attempts = 3 while True: try: async with websockets.connect(websocket_uri, max_size=10 * 1024 * 1024) as websocket: while True: response = await websocket.recv() datas = json.loads(response) todo_urls = set() for data in datas["data"]: if "StrTalker" not in data or "Content" not in data: wiseflow_logger.warning(f"invalid data:\n{data}") continue user_id = data["StrTalker"] items = item_pattern.findall(data["Content"]) # Iterate through all < item > content, extracting < url > and < summary > for item in items: url_match = url_pattern.search(item) url = url_match.group(1) if url_match else None if not url: wiseflow_logger.warning(f"can not find url in \n{item}") continue # URL processing, http is replaced by https, and the part after chksm is removed. url = url.replace('http://', 'https://') cut_off_point = url.find('chksm=') if cut_off_point != -1: url = url[:cut_off_point - 1] # summary_match = summary_pattern.search(item) # addition = summary_match.group(1) if summary_match else None todo_urls.add(url) if todo_urls: await main_process(todo_urls) except websockets.exceptions.ConnectionClosedError as e: wiseflow_logger.error(f"Connection closed with exception: {e}") reconnect_attempts += 1 if reconnect_attempts <= max_reconnect_attempts: wiseflow_logger.info(f"Reconnecting attempt {reconnect_attempts}...") await asyncio.sleep(1) else: wiseflow_logger.error("Max reconnect attempts reached. Exiting.") break except Exception as e: wiseflow_logger.error(f"PublicMsgHandler error: {e}") uri_public = f"ws://{WX_BOT_ENDPOINT}/ws/publicMsg" asyncio.run(get_public_msg(uri_public))