mirror of
https://github.com/TeamWiseFlow/wiseflow.git
synced 2025-01-23 10:50:25 +08:00
121 lines
4.3 KiB
Python
121 lines
4.3 KiB
Python
|
# 使用火山引擎进行翻译的接口封装
|
|||
|
# 通过环境变量设置VOLC_KEY,格式为AK|SK
|
|||
|
# AK-SK 需要手机号注册并实名认证,具体见这里https://console.volcengine.com/iam/keymanage/ (自助接入)
|
|||
|
# 费用:每月免费额度200万字符(1个汉字、1个外语字母、1个数字、1个符号或空格都计为一个字符),超出后49元/每百万字符
|
|||
|
# 图片翻译:每月免费100张,超出后0.04元/张
|
|||
|
# 文本翻译并发限制,每个batch最多16个,总文本长度不超过5000字符,max QPS为10
|
|||
|
# 术语库管理:https://console.volcengine.com/translate
|
|||
|
|
|||
|
|
|||
|
import json
|
|||
|
import time
|
|||
|
import os
|
|||
|
from volcengine.ApiInfo import ApiInfo
|
|||
|
from volcengine.Credentials import Credentials
|
|||
|
from volcengine.ServiceInfo import ServiceInfo
|
|||
|
from volcengine.base.Service import Service
|
|||
|
|
|||
|
|
|||
|
VOLC_KEY = os.environ.get('VOLC_KEY', None)
|
|||
|
if not VOLC_KEY:
|
|||
|
raise Exception('请设置环境变量 VOLC_KEY,格式为AK|SK')
|
|||
|
|
|||
|
k_access_key, k_secret_key = VOLC_KEY.split('|')
|
|||
|
|
|||
|
|
|||
|
def text_translate(texts: list[str], target_language: str = 'zh', source_language: str = '', logger=None) -> list[str]:
|
|||
|
k_service_info = \
|
|||
|
ServiceInfo('translate.volcengineapi.com',
|
|||
|
{'Content-Type': 'application/json'},
|
|||
|
Credentials(k_access_key, k_secret_key, 'translate', 'cn-north-1'),
|
|||
|
5,
|
|||
|
5)
|
|||
|
k_query = {
|
|||
|
'Action': 'TranslateText',
|
|||
|
'Version': '2020-06-01'
|
|||
|
}
|
|||
|
k_api_info = {
|
|||
|
'translate': ApiInfo('POST', '/', k_query, {}, {})
|
|||
|
}
|
|||
|
service = Service(k_service_info, k_api_info)
|
|||
|
if source_language:
|
|||
|
body = {
|
|||
|
'TargetLanguage': target_language,
|
|||
|
'TextList': texts,
|
|||
|
'SourceLanguage': source_language
|
|||
|
}
|
|||
|
else:
|
|||
|
body = {
|
|||
|
'TargetLanguage': 'zh',
|
|||
|
'TextList': texts,
|
|||
|
}
|
|||
|
|
|||
|
if logger:
|
|||
|
logger.debug(f'post body:\n {body}')
|
|||
|
|
|||
|
for i in range(3):
|
|||
|
res = service.json('translate', {}, json.dumps(body))
|
|||
|
result = json.loads(res)
|
|||
|
|
|||
|
if logger:
|
|||
|
logger.debug(f'result:\n {result}')
|
|||
|
|
|||
|
if "Error" not in result["ResponseMetadata"]:
|
|||
|
break
|
|||
|
|
|||
|
if result["ResponseMetadata"]["Error"]["Code"] in ['-400', '-415', '1000XX']:
|
|||
|
if logger:
|
|||
|
logger.warning(f"translation failed cause: {result['ResponseMetadata']['Error']['Message']}")
|
|||
|
else:
|
|||
|
print(f"translation failed cause: {result['ResponseMetadata']['Error']['Message']}")
|
|||
|
return []
|
|||
|
|
|||
|
if logger:
|
|||
|
logger.warning(f"translation failed cause: {result['ResponseMetadata']['Error']['Message']}\n retry...")
|
|||
|
else:
|
|||
|
print(f"translation failed cause: {result['ResponseMetadata']['Error']['Message']}\n retry...")
|
|||
|
time.sleep(1)
|
|||
|
|
|||
|
if "Error" in result["ResponseMetadata"]:
|
|||
|
if logger:
|
|||
|
logger.warning("translation service out of use, have retried 3 times...")
|
|||
|
else:
|
|||
|
print("translation service out of use, have retried 3 times...")
|
|||
|
|
|||
|
return []
|
|||
|
|
|||
|
return [_["Translation"] for _ in result["TranslationList"]]
|
|||
|
|
|||
|
|
|||
|
if __name__ == '__main__':
|
|||
|
import argparse
|
|||
|
from pprint import pprint
|
|||
|
|
|||
|
parser = argparse.ArgumentParser(description='argparse')
|
|||
|
parser.add_argument("--file", "-F", type=str, default=None)
|
|||
|
parser.add_argument('--text', "-T", type=str, default="",
|
|||
|
help="text to translate")
|
|||
|
parser.add_argument('--source', type=str, default="",
|
|||
|
help="source language")
|
|||
|
parser.add_argument('--target', type=str, default='zh',
|
|||
|
help="target language, default zh")
|
|||
|
|
|||
|
args = parser.parse_args()
|
|||
|
|
|||
|
if args.file:
|
|||
|
if not os.path.exists(args.file):
|
|||
|
raise FileNotFoundError("File {} not found".format(args.file))
|
|||
|
if not args.file.endswith(".txt"):
|
|||
|
raise ValueError("File {} should be a text file".format(args.file))
|
|||
|
with open(args.file, "r") as f:
|
|||
|
task = f.readlines()
|
|||
|
task = [_.strip() for _ in task if _.strip()]
|
|||
|
elif args.text:
|
|||
|
task = [args.text]
|
|||
|
else:
|
|||
|
raise ValueError("Please specify task or task file")
|
|||
|
|
|||
|
start_time = time.time()
|
|||
|
pprint(text_translate(task, args.target, args.source))
|
|||
|
print("time cost: {}".format(time.time() - start_time))
|