2024-06-15 20:04:10 +08:00
|
|
|
import asyncio
|
2024-12-09 18:18:10 +08:00
|
|
|
from general_process import crawler, pb, wiseflow_logger
|
2024-06-13 21:08:58 +08:00
|
|
|
|
2024-06-21 10:05:33 +08:00
|
|
|
counter = 1
|
2024-06-13 21:08:58 +08:00
|
|
|
|
|
|
|
|
2024-06-15 20:04:10 +08:00
|
|
|
async def schedule_pipeline(interval):
|
|
|
|
global counter
|
2024-12-10 14:18:03 +08:00
|
|
|
while True:
|
|
|
|
wiseflow_logger.info(f'task execute loop {counter}')
|
|
|
|
sites = pb.read('sites', filter='activated=True')
|
|
|
|
todo_urls = set()
|
|
|
|
for site in sites:
|
|
|
|
if not site['per_hours'] or not site['url']:
|
|
|
|
continue
|
|
|
|
if counter % site['per_hours'] == 0:
|
|
|
|
wiseflow_logger.info(f"applying {site['url']}")
|
|
|
|
todo_urls.add(site['url'].rstrip('/'))
|
2024-12-09 18:18:10 +08:00
|
|
|
|
2024-12-10 14:18:03 +08:00
|
|
|
counter += 1
|
|
|
|
await crawler.run(list(todo_urls))
|
|
|
|
wiseflow_logger.info(f'task execute loop finished, work after {interval} seconds')
|
|
|
|
await asyncio.sleep(interval)
|
2024-06-13 21:08:58 +08:00
|
|
|
|
|
|
|
|
2024-06-15 20:04:10 +08:00
|
|
|
async def main():
|
|
|
|
interval_hours = 1
|
|
|
|
interval_seconds = interval_hours * 60 * 60
|
|
|
|
await schedule_pipeline(interval_seconds)
|
2024-06-13 21:08:58 +08:00
|
|
|
|
2024-06-15 20:04:10 +08:00
|
|
|
asyncio.run(main())
|