import websockets
import json
import re
import httpx
import asyncio
import os, sys
core_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', 'core')
sys.path.append(core_path)
from general_process import main_process, wiseflow_logger
from typing import Optional
import logging
logging.getLogger("httpx").setLevel(logging.WARNING)
# 千万注意扫码登录时不要选择“同步历史消息”,否则会造成 bot 上来挨个回复历史消息
# 先检查下 wx 的登录状态,同时获取已登录微信的 wxid
WX_BOT_ENDPOINT = os.environ.get('WX_BOT_ENDPOINT', '127.0.0.1:8066')
wx_url = f"http://{WX_BOT_ENDPOINT}/api/"
try:
# 发送GET请求
response = httpx.get(f"{wx_url}checklogin")
response.raise_for_status() # 检查HTTP响应状态码是否为200
# 解析JSON响应
data = response.json()
# 检查status字段
if data['data']['status'] == 1:
# 记录wxid
self_wxid = data['data']['wxid']
wiseflow_logger.info(f"已登录微信账号: {self_wxid}")
else:
# 抛出异常
wiseflow_logger.error("未检测到任何登录信息,将退出")
raise ValueError("登录失败,status不为1")
except Exception as e:
wiseflow_logger.error(f"无法链接微信端点:{wx_url}, 错误:\n{e}")
raise ValueError("登录失败,无法连接")
# 获取登录微信昵称,用于后面判断是否@自己的消息
response = httpx.get(f"{wx_url}userinfo")
response.raise_for_status() # 检查HTTP响应状态码是否为200
# 解析JSON响应
data = response.json()
self_nickname = data['data'].get('nickname', " ")
wiseflow_logger.info(f"self_nickname: {self_nickname}")
# 如果要选定只监控部分公众号,请在同一文件夹内创建 config.json 文件,内容为要监控的公众号列表
# 注意这里要写公众号的原始id,即 gh_ 开头的id, 可以通过历史 logger 获取
config_file = 'config.json'
if not os.path.exists(config_file):
config = None
else:
with open(config_file, 'r', encoding='utf-8') as f:
config = json.load(f)
#如下 pattern 仅适用于public msg的解析,群内分享的公号文章不在此列
# The XML parsing scheme is not used because there are abnormal characters in the XML code extracted from the weixin public_msg
item_pattern = re.compile(r'- (.*?)
', re.DOTALL)
url_pattern = re.compile(r'')
async def get_public_msg(websocket_uri):
reconnect_attempts = 0
max_reconnect_attempts = 3
while True:
try:
async with websockets.connect(websocket_uri, max_size=10 * 1024 * 1024) as websocket:
while True:
response = await websocket.recv()
datas = json.loads(response)
todo_urls = set()
for data in datas["data"]:
if "StrTalker" not in data or "Content" not in data:
wiseflow_logger.warning(f"invalid data:\n{data}")
continue
user_id = data["StrTalker"]
items = item_pattern.findall(data["Content"])
# Iterate through all < item > content, extracting < url > and < summary >
for item in items:
url_match = url_pattern.search(item)
url = url_match.group(1) if url_match else None
if not url:
wiseflow_logger.warning(f"can not find url in \n{item}")
continue
# URL processing, http is replaced by https, and the part after chksm is removed.
url = url.replace('http://', 'https://')
cut_off_point = url.find('chksm=')
if cut_off_point != -1:
url = url[:cut_off_point - 1]
# summary_match = summary_pattern.search(item)
# addition = summary_match.group(1) if summary_match else None
todo_urls.add(url)
if todo_urls:
await main_process(todo_urls)
except websockets.exceptions.ConnectionClosedError as e:
wiseflow_logger.error(f"Connection closed with exception: {e}")
reconnect_attempts += 1
if reconnect_attempts <= max_reconnect_attempts:
wiseflow_logger.info(f"Reconnecting attempt {reconnect_attempts}...")
await asyncio.sleep(1)
else:
wiseflow_logger.error("Max reconnect attempts reached. Exiting.")
break
except Exception as e:
wiseflow_logger.error(f"PublicMsgHandler error: {e}")
uri_public = f"ws://{WX_BOT_ENDPOINT}/ws/publicMsg"
asyncio.run(get_public_msg(uri_public))