mirror of
https://github.com/TeamWiseFlow/wiseflow.git
synced 2025-01-23 02:20:20 +08:00
use pb for config
This commit is contained in:
parent
ac021d9ee0
commit
55b27c8984
@ -3,7 +3,7 @@ import time
|
||||
import json
|
||||
import uuid
|
||||
from get_logger import get_logger
|
||||
from pb_api import PbTalker
|
||||
from pb_api import pb
|
||||
from get_report import get_report
|
||||
from get_search import search_insight
|
||||
from tranlsation_volcengine import text_translate
|
||||
@ -20,7 +20,6 @@ class BackendService:
|
||||
|
||||
# 2. load the llm
|
||||
# self.llm = LocalLlmWrapper()
|
||||
self.pb = PbTalker(self.logger)
|
||||
self.memory = {}
|
||||
# self.scholar = Scholar(initial_file_dir=os.path.join(self.project_dir, "files"), use_gpu=use_gpu)
|
||||
self.logger.info(f'{self.name} init success.')
|
||||
@ -33,7 +32,7 @@ class BackendService:
|
||||
:return: 成功的话返回更新后的insight_id(其实跟原id一样), 不成功返回空字符
|
||||
"""
|
||||
self.logger.debug(f'got new report request insight_id {insight_id}')
|
||||
insight = self.pb.read('insights', filter=f'id="{insight_id}"')
|
||||
insight = pb.read('insights', filter=f'id="{insight_id}"')
|
||||
if not insight:
|
||||
self.logger.error(f'insight {insight_id} not found')
|
||||
return self.build_out(-2, 'insight not found')
|
||||
@ -43,8 +42,7 @@ class BackendService:
|
||||
self.logger.error(f'insight {insight_id} has no articles')
|
||||
return self.build_out(-2, 'can not find articles for insight')
|
||||
|
||||
article_list = [self.pb.read('articles',
|
||||
fields=['title', 'abstract', 'content', 'url', 'publish_time'], filter=f'id="{_id}"')
|
||||
article_list = [pb.read('articles', fields=['title', 'abstract', 'content', 'url', 'publish_time'], filter=f'id="{_id}"')
|
||||
for _id in article_ids]
|
||||
article_list = [_article[0] for _article in article_list if _article]
|
||||
|
||||
@ -66,7 +64,7 @@ class BackendService:
|
||||
|
||||
if flag:
|
||||
file = open(docx_file, 'rb')
|
||||
message = self.pb.upload('insights', insight_id, 'docx', f'{insight_id}.docx', file)
|
||||
message = pb.upload('insights', insight_id, 'docx', f'{insight_id}.docx', file)
|
||||
file.close()
|
||||
if message:
|
||||
self.logger.debug(f'report success finish and update to pb-{message}')
|
||||
@ -96,8 +94,7 @@ class BackendService:
|
||||
en_texts = []
|
||||
k = 1
|
||||
for article_id in article_ids:
|
||||
raw_article = self.pb.read(collection_name='articles', fields=['abstract', 'title', 'translation_result'],
|
||||
filter=f'id="{article_id}"')
|
||||
raw_article = pb.read(collection_name='articles', fields=['abstract', 'title', 'translation_result'], filter=f'id="{article_id}"')
|
||||
if not raw_article or not raw_article[0]:
|
||||
self.logger.warning(f'get article {article_id} failed, skipping')
|
||||
flag = -2
|
||||
@ -118,14 +115,11 @@ class BackendService:
|
||||
translate_result = text_translate(en_texts, logger=self.logger)
|
||||
if translate_result and len(translate_result) == 2*len(key_cache):
|
||||
for i in range(0, len(translate_result), 2):
|
||||
related_id = self.pb.add(collection_name='article_translation',
|
||||
body={'title': translate_result[i], 'abstract': translate_result[i+1],
|
||||
'raw': key_cache[int(i/2)]})
|
||||
related_id = pb.add(collection_name='article_translation', body={'title': translate_result[i], 'abstract': translate_result[i+1], 'raw': key_cache[int(i/2)]})
|
||||
if not related_id:
|
||||
self.logger.warning(f'write article_translation {key_cache[int(i/2)]} failed')
|
||||
else:
|
||||
_ = self.pb.update(collection_name='articles', id=key_cache[int(i/2)],
|
||||
body={'translation_result': related_id})
|
||||
_ = pb.update(collection_name='articles', id=key_cache[int(i/2)], body={'translation_result': related_id})
|
||||
if not _:
|
||||
self.logger.warning(f'update article {key_cache[int(i/2)]} failed')
|
||||
self.logger.debug('done')
|
||||
@ -148,14 +142,11 @@ class BackendService:
|
||||
translate_result = text_translate(en_texts, logger=self.logger)
|
||||
if translate_result and len(translate_result) == 2*len(key_cache):
|
||||
for i in range(0, len(translate_result), 2):
|
||||
related_id = self.pb.add(collection_name='article_translation',
|
||||
body={'title': translate_result[i], 'abstract': translate_result[i+1],
|
||||
'raw': key_cache[int(i/2)]})
|
||||
related_id = pb.add(collection_name='article_translation', body={'title': translate_result[i], 'abstract': translate_result[i+1], 'raw': key_cache[int(i/2)]})
|
||||
if not related_id:
|
||||
self.logger.warning(f'write article_translation {key_cache[int(i/2)]} failed')
|
||||
else:
|
||||
_ = self.pb.update(collection_name='articles', id=key_cache[int(i/2)],
|
||||
body={'translation_result': related_id})
|
||||
_ = pb.update(collection_name='articles', id=key_cache[int(i/2)], body={'translation_result': related_id})
|
||||
if not _:
|
||||
self.logger.warning(f'update article {key_cache[int(i/2)]} failed')
|
||||
self.logger.debug('done')
|
||||
@ -172,14 +163,14 @@ class BackendService:
|
||||
:return: 成功的话返回更新后的insight_id(其实跟原id一样), 不成功返回空字符
|
||||
"""
|
||||
self.logger.debug(f'got search request for insight: {insight_id}')
|
||||
insight = self.pb.read('insights', filter=f'id="{insight_id}"')
|
||||
insight = pb.read('insights', filter=f'id="{insight_id}"')
|
||||
if not insight:
|
||||
self.logger.error(f'insight {insight_id} not found')
|
||||
return self.build_out(-2, 'insight not found')
|
||||
|
||||
article_ids = insight[0]['articles']
|
||||
if article_ids:
|
||||
article_list = [self.pb.read('articles', fields=['url'], filter=f'id="{_id}"') for _id in article_ids]
|
||||
article_list = [pb.read('articles', fields=['url'], filter=f'id="{_id}"') for _id in article_ids]
|
||||
url_list = [_article[0]['url'] for _article in article_list if _article]
|
||||
else:
|
||||
url_list = []
|
||||
@ -190,7 +181,7 @@ class BackendService:
|
||||
return self.build_out(flag, 'search engine error or no result')
|
||||
|
||||
for item in search_result:
|
||||
new_article_id = self.pb.add(collection_name='articles', body=item)
|
||||
new_article_id = pb.add(collection_name='articles', body=item)
|
||||
if new_article_id:
|
||||
article_ids.append(new_article_id)
|
||||
else:
|
||||
@ -198,7 +189,7 @@ class BackendService:
|
||||
with open(os.path.join(self.cache_url, 'cache_articles.json'), 'a', encoding='utf-8') as f:
|
||||
json.dump(item, f, ensure_ascii=False, indent=4)
|
||||
|
||||
message = self.pb.update(collection_name='insights', id=insight_id, body={'articles': article_ids})
|
||||
message = pb.update(collection_name='insights', id=insight_id, body={'articles': article_ids})
|
||||
if message:
|
||||
self.logger.debug(f'insight search success finish and update to pb-{message}')
|
||||
return self.build_out(11, insight_id)
|
||||
|
@ -4,18 +4,31 @@
|
||||
import schedule
|
||||
import time
|
||||
from work_process import ServiceProcesser
|
||||
from pb_api import pb
|
||||
|
||||
sp = ServiceProcesser()
|
||||
counter = 0
|
||||
|
||||
|
||||
# 每小时唤醒一次,如果pb的sites表中有信源,会挑取符合周期的信源执行,没有没有的话,则每24小时执行专有爬虫一次
|
||||
def task():
|
||||
with open('../sites.txt', 'r', encoding='utf-8') as f:
|
||||
urls = [line.strip() for line in f.readlines() if line.strip()]
|
||||
sp(sites=urls)
|
||||
global counter
|
||||
sites = pb.read('sites', filter='activated=True')
|
||||
urls = []
|
||||
for site in sites:
|
||||
if not site['per_hours'] or not site['url']:
|
||||
continue
|
||||
if counter % site['per_hours'] == 0:
|
||||
urls.append(site['url'])
|
||||
counter += 1
|
||||
if urls:
|
||||
sp(sites=urls)
|
||||
else:
|
||||
if counter % 24 == 0:
|
||||
sp()
|
||||
|
||||
|
||||
# 每天凌晨1点运行任务
|
||||
schedule.every().day.at("01:17").do(task)
|
||||
schedule.every().hour.do(task)
|
||||
|
||||
while True:
|
||||
schedule.run_pending()
|
||||
|
@ -8,37 +8,63 @@ from general_utils import isChinesePunctuation, is_chinese
|
||||
from tranlsation_volcengine import text_translate
|
||||
import time
|
||||
import re
|
||||
import configparser
|
||||
from pb_api import pb
|
||||
|
||||
|
||||
max_tokens = 4000
|
||||
relation_theshold = 0.525
|
||||
|
||||
config = configparser.ConfigParser()
|
||||
config.read('../config.ini')
|
||||
role_config = pb.read(collection_name='roleplays', filter=f'activated=True')
|
||||
_role_config_id = ''
|
||||
if role_config:
|
||||
character = role_config[0]['character']
|
||||
focus = role_config[0]['focus']
|
||||
focus_type = role_config[0]['focus_type']
|
||||
good_sample1 = role_config[0]['good_sample1']
|
||||
good_sample2 = role_config[0]['good_sample2']
|
||||
bad_sample = role_config[0]['bad_sample']
|
||||
_role_config_id = role_config[0]['id']
|
||||
else:
|
||||
character, good_sample1, focus, focus_type, good_sample2, bad_sample = '', '', '', '', '', ''
|
||||
|
||||
if not character:
|
||||
character = input('请为首席情报官指定角色设定(eg. 来自中国的网络安全情报专家):\n')
|
||||
_role_config_id = pb.add(collection_name='roleplays', body={'character': character, 'activated': True})
|
||||
|
||||
if not _role_config_id:
|
||||
raise Exception('pls check pb data, 无法获取角色设定')
|
||||
|
||||
if not (focus and focus_type and good_sample1 and good_sample2 and bad_sample):
|
||||
focus = input('请为首席情报官指定关注点(eg. 中国关注的网络安全新闻):\n')
|
||||
focus_type = input('请为首席情报官指定关注点类型(eg. 网络安全新闻):\n')
|
||||
good_sample1 = input('请给出一个你期望的情报描述示例(eg. 黑客组织Rhysida声称已入侵中国国有能源公司): \n')
|
||||
good_sample2 = input('请再给出一个理想示例(eg. 差不多一百万份包含未成年人数据(包括家庭地址和照片)的文件对互联网上的任何人都开放,对孩子构成威胁): \n')
|
||||
bad_sample = input('请给出一个你不期望的情报描述示例(eg. 黑客组织活动最近频发): \n')
|
||||
_ = pb.update(collection_name='roleplays', id=_role_config_id, body={'focus': focus, 'focus_type': focus_type, 'good_sample1': good_sample1, 'good_sample2': good_sample2, 'bad_sample': bad_sample})
|
||||
|
||||
# 实践证明,如果强调让llm挖掘我国值得关注的线索,则挖掘效果不好(容易被新闻内容误导,错把别的国家当成我国,可能这时新闻内有我国这样的表述)
|
||||
# step by step 如果是内心独白方式,输出格式包含两种,难度增加了,qwen-max不能很好的适应,也许可以改成两步,第一步先输出线索列表,第二步再会去找对应的新闻编号
|
||||
# 但从实践来看,这样做的性价比并不高,且会引入新的不确定性。
|
||||
_first_stage_prompt = f'''你是一名{config['prompts']['character']},你将被给到一个新闻列表,新闻文章用XML标签分隔。请对此进行分析,挖掘出特别值得{config['prompts']['focus']}线索。你给出的线索应该足够具体,而不是同类型新闻的归类描述,好的例子如:
|
||||
"""{config['prompts']['good_sample1']}"""
|
||||
_first_stage_prompt = f'''你是一名{character},你将被给到一个新闻列表,新闻文章用XML标签分隔。请对此进行分析,挖掘出特别值得{focus}线索。你给出的线索应该足够具体,而不是同类型新闻的归类描述,好的例子如:
|
||||
"""{good_sample1}"""
|
||||
不好的例子如:
|
||||
"""{config['prompts']['bad_sample']}"""
|
||||
"""{bad_sample}"""
|
||||
|
||||
请从头到尾仔细阅读每一条新闻的内容,不要遗漏,然后列出值得关注的线索,每条线索都用一句话进行描述,最终按一条一行的格式输出,并整体用三引号包裹,如下所示:
|
||||
"""
|
||||
{config['prompts']['good_sample1']}
|
||||
{config['prompts']['good_sample2']}
|
||||
{good_sample1}
|
||||
{good_sample2}
|
||||
"""
|
||||
|
||||
不管新闻列表是何种语言,请仅用中文输出分析结果。'''
|
||||
|
||||
_rewrite_insight_prompt = f'''你是一名{config['prompts']['character']},你将被给到一个新闻列表,新闻文章用 XML 标签分隔。请对此进行分析,从中挖掘出一条最值得关注的{config['prompts']['focus_type']}线索。你给出的线索应该足够具体,而不是同类型新闻的归类描述,好的例子如:
|
||||
"""{config['prompts']['good_sample1']}"""
|
||||
_rewrite_insight_prompt = f'''你是一名{character},你将被给到一个新闻列表,新闻文章用XML标签分隔。请对此进行分析,从中挖掘出一条最值得关注的{focus_type}线索。你给出的线索应该足够具体,而不是同类型新闻的归类描述,好的例子如:
|
||||
"""{good_sample1}"""
|
||||
不好的例子如:
|
||||
"""{config['prompts']['bad_sample']}"""
|
||||
"""{bad_sample}"""
|
||||
|
||||
请保证只输出一条最值得关注的线索,线索请用一句话描述,并用三引号包裹输出,如下所示:
|
||||
"""{config['prompts']['good_sample1']}"""
|
||||
"""{good_sample1}"""
|
||||
|
||||
不管新闻列表是何种语言,请仅用中文输出分析结果。'''
|
||||
|
||||
|
@ -7,14 +7,31 @@ from docx.shared import Pt, RGBColor
|
||||
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
|
||||
from datetime import datetime
|
||||
from general_utils import isChinesePunctuation
|
||||
import configparser
|
||||
from pb_api import pb
|
||||
|
||||
# qwen-72b-chat支持最大30k输入,考虑prompt其他部分,content不应超过30000字符长度
|
||||
# 如果换qwen-max(最大输入6k),这里就要换成6000,但这样很多文章不能分析了
|
||||
# 本地部署模型(qwen-14b这里可能仅支持4k输入,可能根本这套模式就行不通)
|
||||
max_input_tokens = 30000
|
||||
config = configparser.ConfigParser()
|
||||
config.read('../config.ini')
|
||||
role_config = pb.read(collection_name='roleplays', filter=f'activated=True')
|
||||
_role_config_id = ''
|
||||
if role_config:
|
||||
character = role_config[0]['character']
|
||||
report_type = role_config[0]['report_type']
|
||||
_role_config_id = role_config[0]['id']
|
||||
else:
|
||||
character, report_type = '', ''
|
||||
|
||||
if not character:
|
||||
character = input('请为首席情报官指定角色设定(eg. 来自中国的网络安全情报专家):\n')
|
||||
_role_config_id = pb.add(collection_name='roleplays', body={'character': character, 'activated': True})
|
||||
|
||||
if not _role_config_id:
|
||||
raise Exception('pls check pb data无法获取角色设定')
|
||||
|
||||
if not report_type:
|
||||
report_type = input('请为首席情报官指定报告类型(eg. 网络安全情报):\n')
|
||||
_ = pb.update(collection_name='roleplays', id=_role_config_id, body={'report_type': report_type})
|
||||
|
||||
|
||||
def get_report(insigt: str, articles: list[dict], memory: str, topics: list[str], comment: str, docx_file: str, logger=None) -> (bool, str):
|
||||
@ -44,7 +61,7 @@ def get_report(insigt: str, articles: list[dict], memory: str, topics: list[str]
|
||||
paragraphs = re.findall("、(.*?)】", memory)
|
||||
if set(topics) <= set(paragraphs):
|
||||
logger.debug("no change in Topics, need modified the report")
|
||||
system_prompt = f'''你是一名{config['prompts']['character']},你近日向上级提交了一份{config['prompts']['report_type']}报告,如下是报告原文。接下来你将收到来自上级部门的修改意见,请据此修改你的报告:
|
||||
system_prompt = f'''你是一名{character},你近日向上级提交了一份{report_type}报告,如下是报告原文。接下来你将收到来自上级部门的修改意见,请据此修改你的报告:
|
||||
报告原文:
|
||||
"""{memory}"""
|
||||
'''
|
||||
@ -66,7 +83,7 @@ def get_report(insigt: str, articles: list[dict], memory: str, topics: list[str]
|
||||
break
|
||||
|
||||
logger.debug(f"articles context length: {len(texts)}")
|
||||
system_prompt = f'''你是一名{config['prompts']['character']},在近期的工作中我们从所关注的网站中发现了一条重要的{config['prompts']['report_type']}线索,线索和相关文章(用XML标签分隔)如下:
|
||||
system_prompt = f'''你是一名{character},在近期的工作中我们从所关注的网站中发现了一条重要的{report_type}线索,线索和相关文章(用XML标签分隔)如下:
|
||||
情报线索: """{insigt} """
|
||||
相关文章:
|
||||
{texts}
|
||||
|
@ -2,11 +2,15 @@ import os
|
||||
from pocketbase import PocketBase # Client also works the same
|
||||
from pocketbase.client import FileUpload
|
||||
from typing import BinaryIO
|
||||
from get_logger import get_logger
|
||||
|
||||
|
||||
class PbTalker:
|
||||
def __init__(self, logger=None) -> None:
|
||||
self.logger = logger
|
||||
def __init__(self) -> None:
|
||||
self.project_dir = os.environ.get("PROJECT_DIR", "")
|
||||
# 1. base initialization
|
||||
os.makedirs(self.project_dir, exist_ok=True)
|
||||
self.logger = get_logger(name='pb_talker', file=os.path.join(self.project_dir, 'pb_talker.log'))
|
||||
url = f"http://{os.environ.get('PB_API_BASE', '127.0.0.1:8090')}"
|
||||
self.logger.debug(f"initializing pocketbase client: {url}")
|
||||
self.client = PocketBase(url)
|
||||
@ -17,7 +21,7 @@ class PbTalker:
|
||||
email, password = auth.split('|')
|
||||
_ = self.client.admins.auth_with_password(email, password)
|
||||
if _:
|
||||
self.logger.info(f"pocketbase ready authenticated as admin - {url}")
|
||||
self.logger.info(f"pocketbase ready authenticated as admin - {email}")
|
||||
else:
|
||||
raise Exception(f"pocketbase auth failed")
|
||||
|
||||
@ -78,3 +82,6 @@ class PbTalker:
|
||||
self.logger.error(f"pocketbase update failed: {e}")
|
||||
return ''
|
||||
return res.id
|
||||
|
||||
|
||||
pb = PbTalker()
|
||||
|
@ -17,6 +17,7 @@ import os
|
||||
header = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/604.1 Edg/112.0.100.0'}
|
||||
project_dir = os.environ.get("PROJECT_DIR", "")
|
||||
os.makedirs(project_dir, exist_ok=True)
|
||||
logger = get_logger(name='general_scraper', file=os.path.join(project_dir, f'general_scraper.log'))
|
||||
|
||||
|
||||
|
@ -13,6 +13,7 @@ header = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/604.1 Edg/112.0.100.0'}
|
||||
|
||||
project_dir = os.environ.get("PROJECT_DIR", "")
|
||||
os.makedirs(project_dir, exist_ok=True)
|
||||
logger = get_logger(name='simple_crawler', file=os.path.join(project_dir, f'simple_crawler.log'))
|
||||
|
||||
|
||||
@ -26,11 +27,11 @@ def simple_crawler(url: str | Path) -> (int, dict):
|
||||
rawdata = response.content
|
||||
encoding = chardet.detect(rawdata)['encoding']
|
||||
text = rawdata.decode(encoding)
|
||||
result = extractor.extract(text)
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
logger.warning(f"cannot get content from {url}\n{e}")
|
||||
return -7, {}
|
||||
|
||||
result = extractor.extract(text)
|
||||
if not result:
|
||||
logger.error(f"gne cannot extract {url}")
|
||||
return 0, {}
|
||||
|
@ -5,7 +5,7 @@ from get_logger import get_logger
|
||||
from datetime import datetime, timedelta, date
|
||||
from scrapers import scraper_map
|
||||
from scrapers.general_scraper import general_scraper
|
||||
from pb_api import PbTalker
|
||||
from pb_api import pb
|
||||
from urllib.parse import urlparse
|
||||
from get_insight import get_insight
|
||||
from general_utils import is_chinese
|
||||
@ -27,7 +27,6 @@ class ServiceProcesser:
|
||||
self.cache_url = os.path.join(self.project_dir, name)
|
||||
os.makedirs(self.cache_url, exist_ok=True)
|
||||
self.logger = get_logger(name=self.name, file=os.path.join(self.project_dir, f'{self.name}.log'))
|
||||
self.pb = PbTalker(self.logger)
|
||||
|
||||
# 2. load the llm
|
||||
# self.llm = LocalLlmWrapper() # if you use the local-llm
|
||||
@ -49,7 +48,7 @@ class ServiceProcesser:
|
||||
self.logger.debug(f'clear cache -- {cache}')
|
||||
# 从pb数据库中读取所有文章url
|
||||
# 这里publish_time用int格式,综合考虑下这个是最容易操作的模式,虽然糙了点
|
||||
existing_articles = self.pb.read(collection_name='articles', fields=['id', 'title', 'url'], filter=f'publish_time>{expiration_str}')
|
||||
existing_articles = pb.read(collection_name='articles', fields=['id', 'title', 'url'], filter=f'publish_time>{expiration_str}')
|
||||
all_title = {}
|
||||
existings = []
|
||||
for article in existing_articles:
|
||||
@ -84,7 +83,7 @@ class ServiceProcesser:
|
||||
value['content'] = f"({from_site} 报道){value['content']}"
|
||||
value['images'] = json.dumps(value['images'])
|
||||
|
||||
article_id = self.pb.add(collection_name='articles', body=value)
|
||||
article_id = pb.add(collection_name='articles', body=value)
|
||||
|
||||
if article_id:
|
||||
cache[article_id] = value
|
||||
@ -104,13 +103,13 @@ class ServiceProcesser:
|
||||
for insight in new_insights:
|
||||
if not insight['content']:
|
||||
continue
|
||||
insight_id = self.pb.add(collection_name='insights', body=insight)
|
||||
insight_id = pb.add(collection_name='insights', body=insight)
|
||||
if not insight_id:
|
||||
self.logger.warning(f'write insight {insight} to pb failed, writing to cache_file')
|
||||
with open(os.path.join(self.cache_url, 'cache_insights.json'), 'a', encoding='utf-8') as f:
|
||||
json.dump(insight, f, ensure_ascii=False, indent=4)
|
||||
for article_id in insight['articles']:
|
||||
raw_article = self.pb.read(collection_name='articles', fields=['abstract', 'title', 'translation_result'], filter=f'id="{article_id}"')
|
||||
raw_article = pb.read(collection_name='articles', fields=['abstract', 'title', 'translation_result'], filter=f'id="{article_id}"')
|
||||
if not raw_article or not raw_article[0]:
|
||||
self.logger.warning(f'get article {article_id} failed, skipping')
|
||||
continue
|
||||
@ -120,11 +119,11 @@ class ServiceProcesser:
|
||||
continue
|
||||
translate_text = text_translate([raw_article[0]['title'], raw_article[0]['abstract']], target_language='zh', logger=self.logger)
|
||||
if translate_text:
|
||||
related_id = self.pb.add(collection_name='article_translation', body={'title': translate_text[0], 'abstract': translate_text[1], 'raw': article_id})
|
||||
related_id = pb.add(collection_name='article_translation', body={'title': translate_text[0], 'abstract': translate_text[1], 'raw': article_id})
|
||||
if not related_id:
|
||||
self.logger.warning(f'write article_translation {article_id} failed')
|
||||
else:
|
||||
_ = self.pb.update(collection_name='articles', id=article_id, body={'translation_result': related_id})
|
||||
_ = pb.update(collection_name='articles', id=article_id, body={'translation_result': related_id})
|
||||
if not _:
|
||||
self.logger.warning(f'update article {article_id} failed')
|
||||
else:
|
||||
@ -140,8 +139,7 @@ class ServiceProcesser:
|
||||
else:
|
||||
text_for_insight = text_translate([value['title']], logger=self.logger)
|
||||
if text_for_insight:
|
||||
insight_id = self.pb.add(collection_name='insights',
|
||||
body={'content': text_for_insight[0], 'articles': [key]})
|
||||
insight_id = pb.add(collection_name='insights', body={'content': text_for_insight[0], 'articles': [key]})
|
||||
if not insight_id:
|
||||
self.logger.warning(f'write insight {text_for_insight[0]} to pb failed, writing to cache_file')
|
||||
with open(os.path.join(self.cache_url, 'cache_insights.json'), 'a',
|
||||
@ -158,7 +156,7 @@ class ServiceProcesser:
|
||||
try:
|
||||
snapshot = requests.get(f"{self.snap_short_server}/zip", {'url': value['url']}, timeout=60)
|
||||
file = open(snapshot.text, 'rb')
|
||||
_ = self.pb.upload('articles', key, 'snapshot', key, file)
|
||||
_ = pb.upload('articles', key, 'snapshot', key, file)
|
||||
file.close()
|
||||
except Exception as e:
|
||||
self.logger.warning(f'error when snapshot {value["url"]}, {e}')
|
||||
|
@ -1,9 +0,0 @@
|
||||
; config.ini
|
||||
[prompts]
|
||||
character = 来自中国的网络安全情报专家
|
||||
focus = 中国关注的网络安全新闻
|
||||
focus_type = 网络安全新闻
|
||||
good_sample1 = 黑客组织Rhysida声称已入侵中国国有能源公司
|
||||
good_sample2 = 差不多一百万份包含未成年人数据(包括家庭地址和照片)的文件对互联网上的任何人都开放,对孩子构成威胁
|
||||
bad_sample = 黑客组织活动最近频发
|
||||
report_type = 网络安全情报
|
135
client/pb/pb_migrations/1713321985_created_roleplays.js
Normal file
135
client/pb/pb_migrations/1713321985_created_roleplays.js
Normal file
@ -0,0 +1,135 @@
|
||||
/// <reference path="../pb_data/types.d.ts" />
|
||||
migrate((db) => {
|
||||
const collection = new Collection({
|
||||
"id": "4rpge043645sp4j",
|
||||
"created": "2024-04-17 02:46:25.373Z",
|
||||
"updated": "2024-04-17 02:46:25.373Z",
|
||||
"name": "roleplays",
|
||||
"type": "base",
|
||||
"system": false,
|
||||
"schema": [
|
||||
{
|
||||
"system": false,
|
||||
"id": "ixk4pwsb",
|
||||
"name": "activated",
|
||||
"type": "bool",
|
||||
"required": false,
|
||||
"presentable": false,
|
||||
"unique": false,
|
||||
"options": {}
|
||||
},
|
||||
{
|
||||
"system": false,
|
||||
"id": "tmak73c7",
|
||||
"name": "character",
|
||||
"type": "text",
|
||||
"required": false,
|
||||
"presentable": false,
|
||||
"unique": false,
|
||||
"options": {
|
||||
"min": null,
|
||||
"max": null,
|
||||
"pattern": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"system": false,
|
||||
"id": "6iuxuwhb",
|
||||
"name": "focus",
|
||||
"type": "text",
|
||||
"required": false,
|
||||
"presentable": false,
|
||||
"unique": false,
|
||||
"options": {
|
||||
"min": null,
|
||||
"max": null,
|
||||
"pattern": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"system": false,
|
||||
"id": "axmc2huy",
|
||||
"name": "focus_type",
|
||||
"type": "text",
|
||||
"required": false,
|
||||
"presentable": false,
|
||||
"unique": false,
|
||||
"options": {
|
||||
"min": null,
|
||||
"max": null,
|
||||
"pattern": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"system": false,
|
||||
"id": "gop61pjt",
|
||||
"name": "good_sample1",
|
||||
"type": "text",
|
||||
"required": false,
|
||||
"presentable": false,
|
||||
"unique": false,
|
||||
"options": {
|
||||
"min": null,
|
||||
"max": null,
|
||||
"pattern": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"system": false,
|
||||
"id": "qmy5cofa",
|
||||
"name": "good_sample2",
|
||||
"type": "text",
|
||||
"required": false,
|
||||
"presentable": false,
|
||||
"unique": false,
|
||||
"options": {
|
||||
"min": null,
|
||||
"max": null,
|
||||
"pattern": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"system": false,
|
||||
"id": "h8gafaci",
|
||||
"name": "bad_sample",
|
||||
"type": "text",
|
||||
"required": false,
|
||||
"presentable": false,
|
||||
"unique": false,
|
||||
"options": {
|
||||
"min": null,
|
||||
"max": null,
|
||||
"pattern": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"system": false,
|
||||
"id": "m2ug5sfd",
|
||||
"name": "report_type",
|
||||
"type": "text",
|
||||
"required": false,
|
||||
"presentable": false,
|
||||
"unique": false,
|
||||
"options": {
|
||||
"min": null,
|
||||
"max": null,
|
||||
"pattern": ""
|
||||
}
|
||||
}
|
||||
],
|
||||
"indexes": [],
|
||||
"listRule": null,
|
||||
"viewRule": null,
|
||||
"createRule": null,
|
||||
"updateRule": null,
|
||||
"deleteRule": null,
|
||||
"options": {}
|
||||
});
|
||||
|
||||
return Dao(db).saveCollection(collection);
|
||||
}, (db) => {
|
||||
const dao = new Dao(db);
|
||||
const collection = dao.findCollectionByNameOrId("4rpge043645sp4j");
|
||||
|
||||
return dao.deleteCollection(collection);
|
||||
})
|
54
client/pb/pb_migrations/1713322324_created_sites.js
Normal file
54
client/pb/pb_migrations/1713322324_created_sites.js
Normal file
@ -0,0 +1,54 @@
|
||||
/// <reference path="../pb_data/types.d.ts" />
|
||||
migrate((db) => {
|
||||
const collection = new Collection({
|
||||
"id": "sma08jpi5rkoxnh",
|
||||
"created": "2024-04-17 02:52:04.291Z",
|
||||
"updated": "2024-04-17 02:52:04.291Z",
|
||||
"name": "sites",
|
||||
"type": "base",
|
||||
"system": false,
|
||||
"schema": [
|
||||
{
|
||||
"system": false,
|
||||
"id": "6qo4l7og",
|
||||
"name": "url",
|
||||
"type": "url",
|
||||
"required": false,
|
||||
"presentable": false,
|
||||
"unique": false,
|
||||
"options": {
|
||||
"exceptDomains": null,
|
||||
"onlyDomains": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"system": false,
|
||||
"id": "lgr1quwi",
|
||||
"name": "per_hours",
|
||||
"type": "number",
|
||||
"required": false,
|
||||
"presentable": false,
|
||||
"unique": false,
|
||||
"options": {
|
||||
"min": 1,
|
||||
"max": 24,
|
||||
"noDecimal": false
|
||||
}
|
||||
}
|
||||
],
|
||||
"indexes": [],
|
||||
"listRule": null,
|
||||
"viewRule": null,
|
||||
"createRule": null,
|
||||
"updateRule": null,
|
||||
"deleteRule": null,
|
||||
"options": {}
|
||||
});
|
||||
|
||||
return Dao(db).saveCollection(collection);
|
||||
}, (db) => {
|
||||
const dao = new Dao(db);
|
||||
const collection = dao.findCollectionByNameOrId("sma08jpi5rkoxnh");
|
||||
|
||||
return dao.deleteCollection(collection);
|
||||
})
|
74
client/pb/pb_migrations/1713328405_updated_sites.js
Normal file
74
client/pb/pb_migrations/1713328405_updated_sites.js
Normal file
@ -0,0 +1,74 @@
|
||||
/// <reference path="../pb_data/types.d.ts" />
|
||||
migrate((db) => {
|
||||
const dao = new Dao(db)
|
||||
const collection = dao.findCollectionByNameOrId("sma08jpi5rkoxnh")
|
||||
|
||||
// update
|
||||
collection.schema.addField(new SchemaField({
|
||||
"system": false,
|
||||
"id": "6qo4l7og",
|
||||
"name": "url",
|
||||
"type": "url",
|
||||
"required": true,
|
||||
"presentable": false,
|
||||
"unique": false,
|
||||
"options": {
|
||||
"exceptDomains": null,
|
||||
"onlyDomains": null
|
||||
}
|
||||
}))
|
||||
|
||||
// update
|
||||
collection.schema.addField(new SchemaField({
|
||||
"system": false,
|
||||
"id": "lgr1quwi",
|
||||
"name": "per_hours",
|
||||
"type": "number",
|
||||
"required": true,
|
||||
"presentable": false,
|
||||
"unique": false,
|
||||
"options": {
|
||||
"min": 1,
|
||||
"max": 24,
|
||||
"noDecimal": false
|
||||
}
|
||||
}))
|
||||
|
||||
return dao.saveCollection(collection)
|
||||
}, (db) => {
|
||||
const dao = new Dao(db)
|
||||
const collection = dao.findCollectionByNameOrId("sma08jpi5rkoxnh")
|
||||
|
||||
// update
|
||||
collection.schema.addField(new SchemaField({
|
||||
"system": false,
|
||||
"id": "6qo4l7og",
|
||||
"name": "url",
|
||||
"type": "url",
|
||||
"required": false,
|
||||
"presentable": false,
|
||||
"unique": false,
|
||||
"options": {
|
||||
"exceptDomains": null,
|
||||
"onlyDomains": null
|
||||
}
|
||||
}))
|
||||
|
||||
// update
|
||||
collection.schema.addField(new SchemaField({
|
||||
"system": false,
|
||||
"id": "lgr1quwi",
|
||||
"name": "per_hours",
|
||||
"type": "number",
|
||||
"required": false,
|
||||
"presentable": false,
|
||||
"unique": false,
|
||||
"options": {
|
||||
"min": 1,
|
||||
"max": 24,
|
||||
"noDecimal": false
|
||||
}
|
||||
}))
|
||||
|
||||
return dao.saveCollection(collection)
|
||||
})
|
27
client/pb/pb_migrations/1713329959_updated_sites.js
Normal file
27
client/pb/pb_migrations/1713329959_updated_sites.js
Normal file
@ -0,0 +1,27 @@
|
||||
/// <reference path="../pb_data/types.d.ts" />
|
||||
migrate((db) => {
|
||||
const dao = new Dao(db)
|
||||
const collection = dao.findCollectionByNameOrId("sma08jpi5rkoxnh")
|
||||
|
||||
// add
|
||||
collection.schema.addField(new SchemaField({
|
||||
"system": false,
|
||||
"id": "8x8n2a47",
|
||||
"name": "activated",
|
||||
"type": "bool",
|
||||
"required": false,
|
||||
"presentable": false,
|
||||
"unique": false,
|
||||
"options": {}
|
||||
}))
|
||||
|
||||
return dao.saveCollection(collection)
|
||||
}, (db) => {
|
||||
const dao = new Dao(db)
|
||||
const collection = dao.findCollectionByNameOrId("sma08jpi5rkoxnh")
|
||||
|
||||
// remove
|
||||
collection.schema.removeField("8x8n2a47")
|
||||
|
||||
return dao.saveCollection(collection)
|
||||
})
|
Loading…
Reference in New Issue
Block a user