wiseflow/client/backend/tranlsation_volcengine.py
2024-04-07 09:37:47 +08:00

121 lines
4.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# 使用火山引擎进行翻译的接口封装
# 通过环境变量设置VOLC_KEY格式为AK|SK
# AK-SK 需要手机号注册并实名认证具体见这里https://console.volcengine.com/iam/keymanage/ (自助接入)
# 费用每月免费额度200万字符1个汉字、1个外语字母、1个数字、1个符号或空格都计为一个字符超出后49元/每百万字符
# 图片翻译每月免费100张超出后0.04元/张
# 文本翻译并发限制每个batch最多16个总文本长度不超过5000字符max QPS为10
# 术语库管理https://console.volcengine.com/translate
import json
import time
import os
from volcengine.ApiInfo import ApiInfo
from volcengine.Credentials import Credentials
from volcengine.ServiceInfo import ServiceInfo
from volcengine.base.Service import Service
VOLC_KEY = os.environ.get('VOLC_KEY', None)
if not VOLC_KEY:
raise Exception('请设置环境变量 VOLC_KEY格式为AK|SK')
k_access_key, k_secret_key = VOLC_KEY.split('|')
def text_translate(texts: list[str], target_language: str = 'zh', source_language: str = '', logger=None) -> list[str]:
k_service_info = \
ServiceInfo('translate.volcengineapi.com',
{'Content-Type': 'application/json'},
Credentials(k_access_key, k_secret_key, 'translate', 'cn-north-1'),
5,
5)
k_query = {
'Action': 'TranslateText',
'Version': '2020-06-01'
}
k_api_info = {
'translate': ApiInfo('POST', '/', k_query, {}, {})
}
service = Service(k_service_info, k_api_info)
if source_language:
body = {
'TargetLanguage': target_language,
'TextList': texts,
'SourceLanguage': source_language
}
else:
body = {
'TargetLanguage': 'zh',
'TextList': texts,
}
if logger:
logger.debug(f'post body:\n {body}')
for i in range(3):
res = service.json('translate', {}, json.dumps(body))
result = json.loads(res)
if logger:
logger.debug(f'result:\n {result}')
if "Error" not in result["ResponseMetadata"]:
break
if result["ResponseMetadata"]["Error"]["Code"] in ['-400', '-415', '1000XX']:
if logger:
logger.warning(f"translation failed cause: {result['ResponseMetadata']['Error']['Message']}")
else:
print(f"translation failed cause: {result['ResponseMetadata']['Error']['Message']}")
return []
if logger:
logger.warning(f"translation failed cause: {result['ResponseMetadata']['Error']['Message']}\n retry...")
else:
print(f"translation failed cause: {result['ResponseMetadata']['Error']['Message']}\n retry...")
time.sleep(1)
if "Error" in result["ResponseMetadata"]:
if logger:
logger.warning("translation service out of use, have retried 3 times...")
else:
print("translation service out of use, have retried 3 times...")
return []
return [_["Translation"] for _ in result["TranslationList"]]
if __name__ == '__main__':
import argparse
from pprint import pprint
parser = argparse.ArgumentParser(description='argparse')
parser.add_argument("--file", "-F", type=str, default=None)
parser.add_argument('--text', "-T", type=str, default="",
help="text to translate")
parser.add_argument('--source', type=str, default="",
help="source language")
parser.add_argument('--target', type=str, default='zh',
help="target language, default zh")
args = parser.parse_args()
if args.file:
if not os.path.exists(args.file):
raise FileNotFoundError("File {} not found".format(args.file))
if not args.file.endswith(".txt"):
raise ValueError("File {} should be a text file".format(args.file))
with open(args.file, "r") as f:
task = f.readlines()
task = [_.strip() for _ in task if _.strip()]
elif args.text:
task = [args.text]
else:
raise ValueError("Please specify task or task file")
start_time = time.time()
pprint(text_translate(task, args.target, args.source))
print("time cost: {}".format(time.time() - start_time))