wiseflow/dashboard/general_utils.py
2024-06-13 21:08:58 +08:00

66 lines
2.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from urllib.parse import urlparse
import os
import re
def isURL(string):
result = urlparse(string)
return result.scheme != '' and result.netloc != ''
def isChinesePunctuation(char):
# 定义中文标点符号的Unicode编码范围
chinese_punctuations = set(range(0x3000, 0x303F)) | set(range(0xFF00, 0xFFEF))
# 检查字符是否在上述范围内
return ord(char) in chinese_punctuations
def is_chinese(string):
"""
使用火山引擎其实可以支持更加广泛的语言检测,未来可以考虑 https://www.volcengine.com/docs/4640/65066
判断字符串中大部分是否是中文
:param string: {str} 需要检测的字符串
:return: {bool} 如果大部分是中文返回True否则返回False
"""
pattern = re.compile(r'[^\u4e00-\u9fa5]')
non_chinese_count = len(pattern.findall(string))
# It is easy to misjudge strictly according to the number of bytes less than half. English words account for a large number of bytes, and there are punctuation marks, etc
return (non_chinese_count/len(string)) < 0.68
def extract_and_convert_dates(input_string):
# Define regular expressions that match different date formats
patterns = [
r'(\d{4})-(\d{2})-(\d{2})', # YYYY-MM-DD
r'(\d{4})/(\d{2})/(\d{2})', # YYYY/MM/DD
r'(\d{4})\.(\d{2})\.(\d{2})', # YYYY.MM.DD
r'(\d{4})\\(\d{2})\\(\d{2})', # YYYY\MM\DD
r'(\d{4})(\d{2})(\d{2})' # YYYYMMDD
]
matches = []
for pattern in patterns:
matches = re.findall(pattern, input_string)
if matches:
break
if matches:
return ''.join(matches[0])
return None
def get_logger_level() -> str:
level_map = {
'silly': 'CRITICAL',
'verbose': 'DEBUG',
'info': 'INFO',
'warn': 'WARNING',
'error': 'ERROR',
}
level: str = os.environ.get('WS_LOG', 'info').lower()
if level not in level_map:
raise ValueError(
'WiseFlow LOG should support the values of `silly`, '
'`verbose`, `info`, `warn`, `error`'
)
return level_map.get(level, 'info')