mirror of
https://github.com/TeamWiseFlow/wiseflow.git
synced 2025-02-02 18:28:46 +08:00
42 lines
1.3 KiB
Python
42 lines
1.3 KiB
Python
from dataclasses import dataclass
|
|
from typing import List, Optional
|
|
from datetime import datetime
|
|
|
|
@dataclass
|
|
class ScraperResultData:
|
|
"""用于存储网页抓取数据的数据类"""
|
|
# url: str
|
|
content: Optional[str] = None
|
|
# links: Optional[Dict[str, str]] = None
|
|
images: Optional[List[str]] = None
|
|
author: Optional[str] = None
|
|
publish_date: Optional[str] = None
|
|
title: Optional[str] = None
|
|
base: Optional[str] = None
|
|
|
|
def __post_init__(self):
|
|
# 初始化可选字段
|
|
if self.images is None:
|
|
self.images = []
|
|
|
|
if self.title is None:
|
|
self.title = ""
|
|
|
|
if self.author is None:
|
|
self.author = ""
|
|
|
|
if self.content is None:
|
|
self.content = ""
|
|
|
|
# 确保 publish_date 是字符串格式
|
|
if self.publish_date is not None:
|
|
if isinstance(self.publish_date, datetime):
|
|
self.publish_date = self.publish_date.isoformat()
|
|
elif not isinstance(self.publish_date, str):
|
|
self.publish_date = str(self.publish_date)
|
|
|
|
# 确保 images 是列表类型
|
|
if self.images is not None:
|
|
if not isinstance(self.images, list):
|
|
raise ValueError("images 必须是列表类型")
|