wiseflow/core/tasks.py

153 lines
5.2 KiB
Python
Raw Normal View History

2024-06-13 21:08:58 +08:00
"""
通过编辑这个脚本可以自定义需要的后台任务
"""
import schedule
import time
from topnews import pipeline
from loguru import logger
from utils.pb_api import PbTalker
import os
from utils.general_utils import get_logger_level
from datetime import datetime, timedelta
import pytz
import requests
project_dir = os.environ.get("PROJECT_DIR", "")
if project_dir:
os.makedirs(project_dir, exist_ok=True)
logger_file = os.path.join(project_dir, 'tasks.log')
dsw_log = get_logger_level()
logger.add(
logger_file,
level=dsw_log,
backtrace=True,
diagnose=True,
rotation="50 MB"
)
pb = PbTalker(logger)
utc_now = datetime.now(pytz.utc)
# 减去一天得到前一天的UTC时间
utc_yesterday = utc_now - timedelta(days=1)
utc_last = utc_yesterday.strftime("%Y-%m-%d %H:%M:%S")
def task():
"""
global counter
sites = pb.read('sites', filter='activated=True')
urls = []
for site in sites:
if not site['per_hours'] or not site['url']:
continue
if counter % site['per_hours'] == 0:
urls.append(site['url'])
logger.info(f'\033[0;32m task execute loop {counter}\033[0m')
logger.info(urls)
if urls:
sp(sites=urls)
else:
if counter % 24 == 0:
sp()
else:
print('\033[0;33mno work for this loop\033[0m')
counter += 1
"""
global utc_last
logger.debug(f'last_collect_time: {utc_last}')
datas = pb.read(collection_name='insights', filter=f'updated>="{utc_last}"', fields=['id', 'content', 'tag', 'articles'])
logger.debug(f"got {len(datas)} items")
utc_last = datetime.now(pytz.utc).strftime("%Y-%m-%d %H:%M:%S")
logger.debug(f'now_utc_time: {utc_last}')
tags = pb.read(collection_name='tags', filter=f'activated=True')
tags_dict = {item["id"]: item["name"] for item in tags if item["name"]}
top_news = {}
for id, name in tags_dict.items():
logger.debug(f'tag: {name}')
data = [item for item in datas if item['tag'] == id]
topnew = pipeline(data, logger)
if not topnew:
logger.debug(f'no top news for {name}')
continue
top_news[id] = {}
for content, articles in topnew.items():
content_urls = [pb.read('articles', filter=f'id="{a}"', fields=['url'])[0]['url'] for a in articles]
# 去除重叠内容
# 如果发现重叠内容,哪个标签长就把对应的从哪个标签删除
to_skip = False
for k, v in top_news.items():
to_del_key = None
for c, u in v.items():
if not set(content_urls).isdisjoint(set(u)):
if len(topnew) > len(v):
to_skip = True
else:
to_del_key = c
break
if to_del_key:
del top_news[k][to_del_key]
if to_skip:
break
if not to_skip:
top_news[id][content] = content_urls
if not top_news[id]:
del top_news[id]
if not top_news:
logger.info("no top news today")
return
# 序列化为字符串
top_news_text = {"#党建引领基层治理": [],
"#数字社区": [],
"#优秀活动案例": []}
for id, v in top_news.items():
# top_news[id] = {content: '\n\n'.join(urls) for content, urls in v.items()}
top_news[id] = {content: urls[0] for content, urls in v.items()}
if id == 's3kqj9ek8nvtthr':
top_news_text["#数字社区"].append("\n".join(f"{content}\n{urls}" for content, urls in top_news[id].items()))
elif id == 'qpcgotbqyz3a617':
top_news_text["#优秀活动案例"].append("\n".join(f"{content}\n{urls}" for content, urls in top_news[id].items()))
else:
top_news_text["#党建引领基层治理"].append("\n".join(f"{content}\n{urls}" for content, urls in top_news[id].items()))
top_news_text = {k: "\n".join(v) for k, v in top_news_text.items()}
top_news_text = "\n\n".join(f"{k}\n{v}" for k, v in top_news_text.items())
logger.info(top_news_text)
data = {
"wxid": "R:10860349446619856",
"content": top_news_text
}
try:
response = requests.post("http://localhost:8088/api/sendtxtmsg", json=data)
if response.status_code == 200:
logger.info("send message to wechat success")
time.sleep(1)
data = {
"wxid": "R:10860349446619856",
"content": "[太阳] 今日份的临小助内参来啦!",
"atlist": ["@all"]
}
try:
response = requests.post("http://localhost:8088/api/sendtxtmsg", json=data)
if response.status_code == 200:
logger.info("send notify to wechat success")
except Exception as e:
logger.error(f"send notify to wechat failed: {e}")
except Exception as e:
logger.error(f"send message to wechat failed: {e}")
schedule.every().day.at("07:38").do(task)
task()
while True:
schedule.run_pending()
time.sleep(60)