wiseflow/core/tasks.py

32 lines
935 B
Python
Raw Normal View History

2024-06-15 20:04:10 +08:00
import asyncio
2024-12-09 18:18:10 +08:00
from general_process import crawler, pb, wiseflow_logger
2024-06-13 21:08:58 +08:00
2024-06-21 10:05:33 +08:00
counter = 1
2024-06-13 21:08:58 +08:00
2024-06-15 20:04:10 +08:00
async def schedule_pipeline(interval):
global counter
2024-12-10 14:18:03 +08:00
while True:
wiseflow_logger.info(f'task execute loop {counter}')
sites = pb.read('sites', filter='activated=True')
todo_urls = set()
for site in sites:
if not site['per_hours'] or not site['url']:
continue
if counter % site['per_hours'] == 0:
wiseflow_logger.info(f"applying {site['url']}")
todo_urls.add(site['url'].rstrip('/'))
2024-12-09 18:18:10 +08:00
2024-12-10 14:18:03 +08:00
counter += 1
await crawler.run(list(todo_urls))
wiseflow_logger.info(f'task execute loop finished, work after {interval} seconds')
await asyncio.sleep(interval)
2024-06-13 21:08:58 +08:00
2024-06-15 20:04:10 +08:00
async def main():
interval_hours = 1
interval_seconds = interval_hours * 60 * 60
await schedule_pipeline(interval_seconds)
2024-06-13 21:08:58 +08:00
2024-06-15 20:04:10 +08:00
asyncio.run(main())