mirror of
https://github.com/TeamWiseFlow/wiseflow.git
synced 2025-01-23 02:20:20 +08:00
feat(core): update pb data sheet structure
This commit is contained in:
parent
3e4454a33b
commit
8c64749ba7
@ -23,7 +23,7 @@ class GeneralInfoExtractor:
|
||||
focus = input('It seems you have not set any focus point, WiseFlow need the specific focus point to guide the following info extract job.'
|
||||
'so please input one now. describe what info you care about shortly: ')
|
||||
explanation = input('Please provide more explanation for the focus point (if not necessary, pls just type enter: ')
|
||||
focus_data.append({"name": focus, "explaination": explanation,
|
||||
focus_data.append({"name": focus, "explanation": explanation,
|
||||
"id": pb.add('focus_points', {"focuspoint": focus, "explanation": explanation})})
|
||||
|
||||
# self.focus_list = [item["focuspoint"] for item in focus_data]
|
||||
|
@ -23,10 +23,6 @@ wiseflow_logger = get_logger('general_process', f'{project_dir}/general_process.
|
||||
pb = PbTalker(wiseflow_logger)
|
||||
gie = GeneralInfoExtractor(pb, wiseflow_logger)
|
||||
|
||||
# Global variables
|
||||
working_list = set()
|
||||
existing_urls = {url['url'] for url in pb.read(collection_name='articles', fields=['url']) if url['url']}
|
||||
lock = asyncio.Lock()
|
||||
|
||||
async def save_to_pb(article: dict, infos: list):
|
||||
# saving to pb process
|
||||
@ -57,7 +53,9 @@ async def save_to_pb(article: dict, infos: list):
|
||||
|
||||
|
||||
async def pipeline(url: str):
|
||||
global working_list, existing_urls
|
||||
working_list = set()
|
||||
existing_urls = {url['url'] for url in pb.read(collection_name='articles', fields=['url']) if url['url']}
|
||||
lock = asyncio.Lock()
|
||||
working_list.add(url)
|
||||
crawler = PlaywrightCrawler(
|
||||
# Limit the crawl to max requests. Remove or increase it for crawling all links.
|
||||
@ -179,6 +177,9 @@ async def pipeline(url: str):
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import asyncio
|
||||
sites = pb.read('sites', filter='activated=True')
|
||||
wiseflow_logger.info('execute all sites one time')
|
||||
async def run_all_sites():
|
||||
await asyncio.gather(*[pipeline(site['url'].rstrip('/')) for site in sites])
|
||||
|
||||
asyncio.run(pipeline())
|
||||
asyncio.run(run_all_sites())
|
||||
|
89
core/pb/pb_migrations/1733465276_created_sites.js
Normal file
89
core/pb/pb_migrations/1733465276_created_sites.js
Normal file
@ -0,0 +1,89 @@
|
||||
/// <reference path="../pb_data/types.d.ts" />
|
||||
migrate((app) => {
|
||||
const collection = new Collection({
|
||||
"createRule": null,
|
||||
"deleteRule": null,
|
||||
"fields": [
|
||||
{
|
||||
"autogeneratePattern": "[a-z0-9]{15}",
|
||||
"hidden": false,
|
||||
"id": "text3208210256",
|
||||
"max": 15,
|
||||
"min": 15,
|
||||
"name": "id",
|
||||
"pattern": "^[a-z0-9]+$",
|
||||
"presentable": false,
|
||||
"primaryKey": true,
|
||||
"required": true,
|
||||
"system": true,
|
||||
"type": "text"
|
||||
},
|
||||
{
|
||||
"exceptDomains": [],
|
||||
"hidden": false,
|
||||
"id": "url4101391790",
|
||||
"name": "url",
|
||||
"onlyDomains": [],
|
||||
"presentable": false,
|
||||
"required": true,
|
||||
"system": false,
|
||||
"type": "url"
|
||||
},
|
||||
{
|
||||
"hidden": false,
|
||||
"id": "number1152796692",
|
||||
"max": null,
|
||||
"min": null,
|
||||
"name": "per_hours",
|
||||
"onlyInt": false,
|
||||
"presentable": false,
|
||||
"required": false,
|
||||
"system": false,
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"hidden": false,
|
||||
"id": "bool806155165",
|
||||
"name": "activated",
|
||||
"presentable": false,
|
||||
"required": false,
|
||||
"system": false,
|
||||
"type": "bool"
|
||||
},
|
||||
{
|
||||
"hidden": false,
|
||||
"id": "autodate2990389176",
|
||||
"name": "created",
|
||||
"onCreate": true,
|
||||
"onUpdate": false,
|
||||
"presentable": false,
|
||||
"system": false,
|
||||
"type": "autodate"
|
||||
},
|
||||
{
|
||||
"hidden": false,
|
||||
"id": "autodate3332085495",
|
||||
"name": "updated",
|
||||
"onCreate": true,
|
||||
"onUpdate": true,
|
||||
"presentable": false,
|
||||
"system": false,
|
||||
"type": "autodate"
|
||||
}
|
||||
],
|
||||
"id": "pbc_2001081480",
|
||||
"indexes": [],
|
||||
"listRule": null,
|
||||
"name": "sites",
|
||||
"system": false,
|
||||
"type": "base",
|
||||
"updateRule": null,
|
||||
"viewRule": null
|
||||
});
|
||||
|
||||
return app.save(collection);
|
||||
}, (app) => {
|
||||
const collection = app.findCollectionByNameOrId("pbc_2001081480");
|
||||
|
||||
return app.delete(collection);
|
||||
})
|
28
core/pb/pb_migrations/1733465426_updated_articles.js
Normal file
28
core/pb/pb_migrations/1733465426_updated_articles.js
Normal file
@ -0,0 +1,28 @@
|
||||
/// <reference path="../pb_data/types.d.ts" />
|
||||
migrate((app) => {
|
||||
const collection = app.findCollectionByNameOrId("pbc_4287850865")
|
||||
|
||||
// add field
|
||||
collection.fields.addAt(6, new Field({
|
||||
"cascadeDelete": false,
|
||||
"collectionId": "pbc_3385864241",
|
||||
"hidden": false,
|
||||
"id": "relation1874629670",
|
||||
"maxSelect": 999,
|
||||
"minSelect": 0,
|
||||
"name": "tags",
|
||||
"presentable": false,
|
||||
"required": false,
|
||||
"system": false,
|
||||
"type": "relation"
|
||||
}))
|
||||
|
||||
return app.save(collection)
|
||||
}, (app) => {
|
||||
const collection = app.findCollectionByNameOrId("pbc_4287850865")
|
||||
|
||||
// remove field
|
||||
collection.fields.removeById("relation1874629670")
|
||||
|
||||
return app.save(collection)
|
||||
})
|
111
core/pb/pb_migrations/1733465563_created_infos.js
Normal file
111
core/pb/pb_migrations/1733465563_created_infos.js
Normal file
@ -0,0 +1,111 @@
|
||||
/// <reference path="../pb_data/types.d.ts" />
|
||||
migrate((app) => {
|
||||
const collection = new Collection({
|
||||
"createRule": null,
|
||||
"deleteRule": null,
|
||||
"fields": [
|
||||
{
|
||||
"autogeneratePattern": "[a-z0-9]{15}",
|
||||
"hidden": false,
|
||||
"id": "text3208210256",
|
||||
"max": 15,
|
||||
"min": 15,
|
||||
"name": "id",
|
||||
"pattern": "^[a-z0-9]+$",
|
||||
"presentable": false,
|
||||
"primaryKey": true,
|
||||
"required": true,
|
||||
"system": true,
|
||||
"type": "text"
|
||||
},
|
||||
{
|
||||
"autogeneratePattern": "",
|
||||
"hidden": false,
|
||||
"id": "text4274335913",
|
||||
"max": 0,
|
||||
"min": 0,
|
||||
"name": "content",
|
||||
"pattern": "",
|
||||
"presentable": false,
|
||||
"primaryKey": false,
|
||||
"required": true,
|
||||
"system": false,
|
||||
"type": "text"
|
||||
},
|
||||
{
|
||||
"cascadeDelete": false,
|
||||
"collectionId": "pbc_3385864241",
|
||||
"hidden": false,
|
||||
"id": "relation59357059",
|
||||
"maxSelect": 1,
|
||||
"minSelect": 0,
|
||||
"name": "tag",
|
||||
"presentable": false,
|
||||
"required": false,
|
||||
"system": false,
|
||||
"type": "relation"
|
||||
},
|
||||
{
|
||||
"cascadeDelete": false,
|
||||
"collectionId": "pbc_4287850865",
|
||||
"hidden": false,
|
||||
"id": "relation3218944360",
|
||||
"maxSelect": 999,
|
||||
"minSelect": 0,
|
||||
"name": "articles",
|
||||
"presentable": false,
|
||||
"required": false,
|
||||
"system": false,
|
||||
"type": "relation"
|
||||
},
|
||||
{
|
||||
"hidden": false,
|
||||
"id": "file3291445124",
|
||||
"maxSelect": 1,
|
||||
"maxSize": 0,
|
||||
"mimeTypes": [],
|
||||
"name": "report",
|
||||
"presentable": false,
|
||||
"protected": false,
|
||||
"required": false,
|
||||
"system": false,
|
||||
"thumbs": [],
|
||||
"type": "file"
|
||||
},
|
||||
{
|
||||
"hidden": false,
|
||||
"id": "autodate2990389176",
|
||||
"name": "created",
|
||||
"onCreate": true,
|
||||
"onUpdate": false,
|
||||
"presentable": false,
|
||||
"system": false,
|
||||
"type": "autodate"
|
||||
},
|
||||
{
|
||||
"hidden": false,
|
||||
"id": "autodate3332085495",
|
||||
"name": "updated",
|
||||
"onCreate": true,
|
||||
"onUpdate": true,
|
||||
"presentable": false,
|
||||
"system": false,
|
||||
"type": "autodate"
|
||||
}
|
||||
],
|
||||
"id": "pbc_629947526",
|
||||
"indexes": [],
|
||||
"listRule": null,
|
||||
"name": "infos",
|
||||
"system": false,
|
||||
"type": "base",
|
||||
"updateRule": null,
|
||||
"viewRule": null
|
||||
});
|
||||
|
||||
return app.save(collection);
|
||||
}, (app) => {
|
||||
const collection = app.findCollectionByNameOrId("pbc_629947526");
|
||||
|
||||
return app.delete(collection);
|
||||
})
|
@ -1,5 +1,5 @@
|
||||
import asyncio
|
||||
from agents import pipeline, pb, logger
|
||||
from general_process import pipeline, pb, wiseflow_logger
|
||||
|
||||
counter = 1
|
||||
|
||||
@ -8,7 +8,7 @@ async def process_site(site, counter):
|
||||
if not site['per_hours'] or not site['url']:
|
||||
return
|
||||
if counter % site['per_hours'] == 0:
|
||||
logger.info(f"applying {site['url']}")
|
||||
wiseflow_logger.info(f"applying {site['url']}")
|
||||
await pipeline(site['url'].rstrip('/'))
|
||||
|
||||
|
||||
@ -16,11 +16,11 @@ async def schedule_pipeline(interval):
|
||||
global counter
|
||||
while True:
|
||||
sites = pb.read('sites', filter='activated=True')
|
||||
logger.info(f'task execute loop {counter}')
|
||||
wiseflow_logger.info(f'task execute loop {counter}')
|
||||
await asyncio.gather(*[process_site(site, counter) for site in sites])
|
||||
|
||||
counter += 1
|
||||
logger.info(f'task execute loop finished, work after {interval} seconds')
|
||||
wiseflow_logger.info(f'task execute loop finished, work after {interval} seconds')
|
||||
await asyncio.sleep(interval)
|
||||
|
||||
|
||||
|
16
env_sample
16
env_sample
@ -1,10 +1,10 @@
|
||||
export LLM_API_KEY=""
|
||||
export LLM_API_BASE="https://api.siliconflow.cn/v1" ##for local model services or calling non-OpenAI services with openai_wrapper
|
||||
##strongly recommended to use the following model provided by siliconflow (consider both effect and price)
|
||||
export GET_INFO_MODEL="THUDM/glm-4-9b-chat" ##
|
||||
export REWRITE_MODEL="Qwen/Qwen2-7B-Instruct"
|
||||
export HTML_PARSE_MODEL="aQwen/Qwen2-7B-Instruct"
|
||||
export LLM_API_BASE="https://api.siliconflow.cn/v1"
|
||||
export PB_API_AUTH="test@example.com|1234567890" ##your pb superuser account and password
|
||||
export VERBOSE="true" ##for detail log info. If not need, remove this item.
|
||||
|
||||
##belowing is optional, go as you need
|
||||
#export PRIMARY_MODEL="Qwen/Qwen2.5-14B-Instruct"
|
||||
#export SECONDARY_MODEL="THUDM/glm-4-9b-chat"
|
||||
export PROJECT_DIR="work_dir"
|
||||
export PB_API_AUTH="test@example.com|1234567890"
|
||||
# export "PB_API_BASE"="" ##only use if your pb not run on 127.0.0.1:8090
|
||||
export WS_LOG="verbose" ##for detail log info. If not need, just delete this item.
|
||||
#export "PB_API_BASE"="" ##only use if your pb not run on 127.0.0.1:8090
|
Loading…
Reference in New Issue
Block a user