mirror of
https://github.com/yihong0618/Kindle_download_helper.git
synced 2025-11-22 07:59:04 +08:00
feat: no need kindle
This commit is contained in:
4
.gitignore
vendored
4
.gitignore
vendored
@@ -133,4 +133,6 @@ dmypy.json
|
||||
|
||||
# remove MacOS .DS_Store
|
||||
.DS_Store
|
||||
EPUB/
|
||||
EPUB/
|
||||
.device_id
|
||||
.tokens
|
||||
329
cli.py
Normal file
329
cli.py
Normal file
@@ -0,0 +1,329 @@
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
||||
import urllib3
|
||||
|
||||
from kindle_download_helper.config import (
|
||||
DEFAULT_OUT_DEDRM_DIR,
|
||||
DEFAULT_OUT_DIR,
|
||||
DEFAULT_OUT_EPUB_DIR,
|
||||
DEFAULT_SESSION_FILE,
|
||||
)
|
||||
from kindle_download_helper.kindle import Kindle
|
||||
|
||||
logger = logging.getLogger("kindle")
|
||||
fh = logging.FileHandler(".error_books.log")
|
||||
fh.setLevel(logging.ERROR)
|
||||
logger.addHandler(fh)
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
|
||||
# download selected books for cli
|
||||
def download_selected_books(kindle, options):
|
||||
# get all books and get the default device
|
||||
print("Getting all books, please wait...")
|
||||
books = kindle.get_all_books(filetype=options.filetype)
|
||||
device = kindle.find_device()
|
||||
|
||||
# print all books
|
||||
for idx, book in enumerate(books):
|
||||
print(
|
||||
"Index: "
|
||||
+ "{:>5d}".format(idx + 1)
|
||||
+ " | Title: "
|
||||
+ book["title"]
|
||||
+ " | asin: "
|
||||
+ book["asin"]
|
||||
)
|
||||
|
||||
# download loop
|
||||
while True:
|
||||
# get the indices of the books to download
|
||||
indices = input(
|
||||
"Input the index of books you want to download, split by space (q to quit, l to list books).\n"
|
||||
).split()
|
||||
|
||||
# if input "q", quit
|
||||
# if input "l", list all books again
|
||||
if indices[0] == "q":
|
||||
break
|
||||
elif indices[0] == "l":
|
||||
for idx, book in enumerate(books):
|
||||
print(
|
||||
"Index: "
|
||||
+ "{:>5d}".format(idx + 1)
|
||||
+ " | Title: "
|
||||
+ book["title"]
|
||||
+ " | asin: "
|
||||
+ book["asin"]
|
||||
)
|
||||
continue
|
||||
|
||||
# decode the indices
|
||||
downlist = []
|
||||
flag = True
|
||||
for idx in indices:
|
||||
if idx.isnumeric() == False:
|
||||
if ":" in idx:
|
||||
# if is not a number, and ":" in it, then it is a range
|
||||
# decode the range
|
||||
idx_begin, idx_end = [int(i) for i in idx.split(":")]
|
||||
# append the range to downlist
|
||||
extend_list = [i for i in range(idx_begin - 1, idx_end)]
|
||||
downlist.extend(extend_list)
|
||||
else:
|
||||
# if is not a number, and no ":" in it, then it is an error
|
||||
print("Input error, please input numbers!!!")
|
||||
flag = False
|
||||
break
|
||||
else:
|
||||
# if is a number, then append it to downlist
|
||||
downlist.append(int(idx) - 1)
|
||||
if not flag:
|
||||
continue
|
||||
|
||||
# remove the duplicate indices
|
||||
downlist = list(set(downlist))
|
||||
|
||||
# check if the indices are valid
|
||||
if max(downlist) >= len(books) or min(downlist) < 0:
|
||||
print(
|
||||
"Input error, please input numbers between 1 and "
|
||||
+ str(len(books))
|
||||
+ "!!!"
|
||||
)
|
||||
continue
|
||||
|
||||
# print the books to download
|
||||
for idx in downlist:
|
||||
print(
|
||||
"Index: "
|
||||
+ "{:>5d}".format(idx + 1)
|
||||
+ " | Title: "
|
||||
+ books[idx]["title"]
|
||||
+ " | asin: "
|
||||
+ books[idx]["asin"]
|
||||
)
|
||||
print("Downloading " + str(len(downlist)) + " books:")
|
||||
|
||||
# ask if to continue
|
||||
while True:
|
||||
flag = input("Continue? (y/n)")
|
||||
if flag == "y" or flag == "n":
|
||||
break
|
||||
else:
|
||||
print("Input error, please input y or n")
|
||||
if flag == "n":
|
||||
continue
|
||||
|
||||
# download the books
|
||||
for i, idx in enumerate(downlist):
|
||||
print(
|
||||
"Downloading "
|
||||
+ str(i + 1)
|
||||
+ "/"
|
||||
+ str(len(downlist))
|
||||
+ " "
|
||||
+ books[idx]["title"]
|
||||
+ " ..."
|
||||
)
|
||||
kindle.download_one_book(books[idx], device, idx, filetype=options.filetype)
|
||||
print("Download finished.")
|
||||
|
||||
|
||||
def main():
|
||||
logger.setLevel(os.environ.get("LOGGING_LEVEL", "INFO"))
|
||||
|
||||
logger.addHandler(logging.StreamHandler())
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("csrf_token", help="amazon or amazon cn csrf token", nargs="?")
|
||||
|
||||
cookie_group = parser.add_mutually_exclusive_group()
|
||||
cookie_group.add_argument(
|
||||
"--cookie", dest="cookie", default="", help="amazon or amazon cn cookie"
|
||||
)
|
||||
cookie_group.add_argument(
|
||||
"--cookie-file", dest="cookie_file", default="", help="load cookie local file"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--cn",
|
||||
dest="domain",
|
||||
action="store_const",
|
||||
const="cn",
|
||||
default="com",
|
||||
help="if your account is an amazon.cn account",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--jp",
|
||||
dest="domain",
|
||||
action="store_const",
|
||||
const="jp",
|
||||
default="com",
|
||||
help="if your account is an amazon.co.jp account",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--de",
|
||||
dest="domain",
|
||||
action="store_const",
|
||||
const="de",
|
||||
default="com",
|
||||
help="if your account is an amazon.de account",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--uk",
|
||||
dest="domain",
|
||||
action="store_const",
|
||||
const="uk",
|
||||
default="com",
|
||||
help="if your account is an amazon.co.uk account",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--resume-from",
|
||||
dest="index",
|
||||
type=int,
|
||||
default=1,
|
||||
help="resume from the index if download failed",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cut-length",
|
||||
dest="cut_length",
|
||||
type=int,
|
||||
default=100,
|
||||
help="truncate the file name",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o", "--outdir", default=DEFAULT_OUT_DIR, help="dwonload output dir"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-od",
|
||||
"--outdedrmdir",
|
||||
default=DEFAULT_OUT_DEDRM_DIR,
|
||||
help="dwonload output dedrm dir",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-oe",
|
||||
"--outepubmdir",
|
||||
default=DEFAULT_OUT_EPUB_DIR,
|
||||
help="dwonload output epub dir",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-s",
|
||||
"--session-file",
|
||||
default=DEFAULT_SESSION_FILE,
|
||||
help="The reusable session dump file",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pdoc",
|
||||
dest="filetype",
|
||||
action="store_const",
|
||||
const="PDOC",
|
||||
default="EBOK",
|
||||
help="to download personal documents or ebook",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--resolve_duplicate_names",
|
||||
dest="resolve_duplicate_names",
|
||||
action="store_true",
|
||||
help="Resolve duplicate names files to download",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--readme",
|
||||
dest="readme",
|
||||
action="store_true",
|
||||
help="If you want to generate kindle readme stats",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dedrm",
|
||||
dest="dedrm",
|
||||
action="store_true",
|
||||
help="If you want to `dedrm` directly",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--list",
|
||||
dest="list_only",
|
||||
action="store_true",
|
||||
help="just list books/pdoc, not to download",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--device_sn",
|
||||
dest="device_sn",
|
||||
default="",
|
||||
help="Download file for device with this serial number",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--mode",
|
||||
dest="mode",
|
||||
default="all",
|
||||
help="Mode of download, all: download all files at once, sel: download selected files",
|
||||
)
|
||||
|
||||
options = parser.parse_args()
|
||||
|
||||
if not os.path.exists(options.outdir):
|
||||
os.makedirs(options.outdir)
|
||||
# for dedrm
|
||||
if not os.path.exists(options.outdedrmdir):
|
||||
os.makedirs(options.outdedrmdir)
|
||||
# for epub
|
||||
if not os.path.exists(options.outepubmdir):
|
||||
os.makedirs(options.outepubmdir)
|
||||
|
||||
kindle = Kindle(
|
||||
options.csrf_token,
|
||||
options.domain,
|
||||
options.outdir,
|
||||
options.outdedrmdir,
|
||||
options.outepubmdir,
|
||||
options.cut_length,
|
||||
session_file=options.session_file,
|
||||
device_sn=options.device_sn,
|
||||
)
|
||||
# other args
|
||||
kindle.to_resolve_duplicate_names = options.resolve_duplicate_names
|
||||
kindle.dedrm = options.dedrm
|
||||
|
||||
if options.cookie_file:
|
||||
with open(options.cookie_file, "r") as f:
|
||||
kindle.set_cookie_from_string(f.read())
|
||||
elif options.cookie:
|
||||
kindle.set_cookie_from_string(options.cookie)
|
||||
else:
|
||||
kindle.is_browser_cookie = True
|
||||
|
||||
if options.list_only:
|
||||
kindle.get_devices()
|
||||
print(
|
||||
json.dumps(
|
||||
kindle.get_all_books(filetype=options.filetype),
|
||||
indent=4,
|
||||
ensure_ascii=False,
|
||||
)
|
||||
)
|
||||
exit()
|
||||
|
||||
if options.readme:
|
||||
# generate readme stats
|
||||
kindle.make_kindle_stats_readme()
|
||||
else:
|
||||
# check the download mode
|
||||
if options.mode == "all":
|
||||
# download all books
|
||||
kindle.download_books(
|
||||
start_index=options.index - 1, filetype=options.filetype
|
||||
)
|
||||
elif options.mode == "sel":
|
||||
# download selected books
|
||||
download_selected_books(kindle, options)
|
||||
else:
|
||||
print("mode error, please input all or sel")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,2 +1,3 @@
|
||||
from kindle_download_helper.cli import main
|
||||
from kindle_download_helper import kindle
|
||||
from kindle_download_helper.cli import main
|
||||
from kindle_download_helper.no_cli import no_main
|
||||
|
||||
319
kindle_download_helper/amazon_api.py
Normal file
319
kindle_download_helper/amazon_api.py
Normal file
@@ -0,0 +1,319 @@
|
||||
import base64
|
||||
import datetime
|
||||
import gzip
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import os
|
||||
import secrets
|
||||
import sys
|
||||
import time
|
||||
import uuid
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
import xmltodict
|
||||
from Crypto.Cipher import AES
|
||||
from Crypto.Hash import SHA256
|
||||
from Crypto.Protocol.KDF import PBKDF2
|
||||
from Crypto.PublicKey import RSA
|
||||
from Crypto.Signature import pkcs1_15
|
||||
|
||||
SCRIPT_PATH = os.path.dirname(os.path.realpath(sys.argv[0]))
|
||||
DEVICE_ID_PATH = os.path.join(SCRIPT_PATH, ".device_id")
|
||||
TOKENS_PATH = os.path.join(SCRIPT_PATH, ".tokens")
|
||||
|
||||
if os.path.isfile(DEVICE_ID_PATH):
|
||||
with open(DEVICE_ID_PATH, "r") as f:
|
||||
DEVICE_ID = f.read()
|
||||
else:
|
||||
with open(DEVICE_ID_PATH, "w") as f:
|
||||
DEVICE_ID = secrets.token_hex(16)
|
||||
f.write(DEVICE_ID)
|
||||
|
||||
PID = hashlib.sha256(DEVICE_ID.encode()).hexdigest()[23:31].upper()
|
||||
|
||||
|
||||
def save_tokens(tokens):
|
||||
with open(TOKENS_PATH, "w") as f:
|
||||
f.write(json.dumps(tokens))
|
||||
|
||||
|
||||
def get_tokens():
|
||||
if os.path.isfile(TOKENS_PATH):
|
||||
with open(TOKENS_PATH, "r") as f:
|
||||
return json.loads(f.read())
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
APP_NAME = "com.iconology.comix"
|
||||
APP_VERSION = "1221328936"
|
||||
DEVICE_NAME = "walleye/google/Pixel 2"
|
||||
DEVICE_TYPE = "A2A33MVZVPQKHY"
|
||||
MANUFACTURER = "Google"
|
||||
OS_VERSION = "google/walleye/walleye:8.1.0/OPM1.171019.021/4565141:user/release-keys"
|
||||
PFM = "A1F83G8C2ARO7P"
|
||||
SW_VERSION = "1221328936"
|
||||
|
||||
|
||||
def get_auth_headers(domain):
|
||||
return {
|
||||
"Accept-Charset": "utf-8",
|
||||
"User-Agent": "Dalvik/2.1.0 (Linux; U; Android 10; Pixel 2 Build/OPM1.171019.021)",
|
||||
"x-amzn-identity-auth-domain": f"api.amazon.{domain}",
|
||||
"x-amzn-requestid": str(uuid.uuid4()).replace("-", ""),
|
||||
}
|
||||
|
||||
|
||||
def get_api_headers():
|
||||
return {
|
||||
"accept": "*/*",
|
||||
"accept-encoding": "gzip",
|
||||
"accept-language": "en-US",
|
||||
"currenttransportmethod": "WiFi",
|
||||
"is_archived_items": "1",
|
||||
"software_rev": SW_VERSION,
|
||||
"user-agent": "okhttp/3.12.1",
|
||||
"x-adp-app-id": APP_NAME,
|
||||
"x-adp-app-sw": SW_VERSION,
|
||||
"x-adp-attemptcount": "1",
|
||||
"x-adp-cor": "US",
|
||||
"x-adp-country": "US",
|
||||
"x-adp-lto": "0",
|
||||
"x-adp-pfm": PFM,
|
||||
"x-adp-reason": "ArchivedItems",
|
||||
"x-adp-sw": SW_VERSION,
|
||||
"x-adp-transport": "WiFi",
|
||||
"x-amzn-accept-type": "application/x.amzn.digital.deliverymanifest@1.0",
|
||||
}
|
||||
|
||||
|
||||
def generate_frc(device_id):
|
||||
cookies = json.dumps(
|
||||
{
|
||||
"ApplicationName": APP_NAME,
|
||||
"ApplicationVersion": APP_VERSION,
|
||||
"DeviceLanguage": "en",
|
||||
"DeviceName": DEVICE_NAME,
|
||||
"DeviceOSVersion": OS_VERSION,
|
||||
"IpAddress": requests.get("https://api.ipify.org").text,
|
||||
"ScreenHeightPixels": "1920",
|
||||
"ScreenWidthPixels": "1280",
|
||||
"TimeZone": "00:00",
|
||||
}
|
||||
)
|
||||
|
||||
def pkcs7_pad(data):
|
||||
padsize = 16 - len(data) % 16
|
||||
return data + bytes([padsize]) * padsize
|
||||
|
||||
compressed = gzip.compress(cookies.encode())
|
||||
|
||||
key = PBKDF2(device_id, b"AES/CBC/PKCS7Padding")
|
||||
iv = secrets.token_bytes(16)
|
||||
cipher = AES.new(key, AES.MODE_CBC, iv)
|
||||
ciphertext = cipher.encrypt(pkcs7_pad(compressed))
|
||||
|
||||
hmac_ = hmac.new(
|
||||
PBKDF2(device_id, b"HmacSHA256"), iv + ciphertext, hashlib.sha256
|
||||
).digest()
|
||||
|
||||
return base64.b64encode(b"\0" + hmac_[:8] + iv + ciphertext).decode()
|
||||
|
||||
|
||||
def login(email, password, domain="com", device_id=DEVICE_ID):
|
||||
tokens = get_tokens()
|
||||
if tokens and tokens["name"] == hashlib.md5(email.encode()).hexdigest():
|
||||
return refresh(tokens)
|
||||
|
||||
body = {
|
||||
"auth_data": {
|
||||
"use_global_authentication": "true",
|
||||
"user_id_password": {"password": password, "user_id": email},
|
||||
},
|
||||
"registration_data": {
|
||||
"domain": "DeviceLegacy",
|
||||
"device_type": DEVICE_TYPE,
|
||||
"device_serial": device_id,
|
||||
"app_name": APP_NAME,
|
||||
"app_version": APP_VERSION,
|
||||
"device_model": DEVICE_NAME,
|
||||
"os_version": OS_VERSION,
|
||||
"software_version": SW_VERSION,
|
||||
},
|
||||
"requested_token_type": [
|
||||
"bearer",
|
||||
"mac_dms",
|
||||
"store_authentication_cookie",
|
||||
"website_cookies",
|
||||
],
|
||||
"cookies": {"domain": f"amazon.{domain}", "website_cookies": []},
|
||||
"user_context_map": {"frc": generate_frc(device_id)},
|
||||
"device_metadata": {
|
||||
"device_os_family": "android",
|
||||
"device_type": DEVICE_TYPE,
|
||||
"device_serial": device_id,
|
||||
"manufacturer": MANUFACTURER,
|
||||
"model": DEVICE_NAME,
|
||||
"os_version": "30",
|
||||
"android_id": "e97690019ccaab2b",
|
||||
"product": DEVICE_NAME,
|
||||
},
|
||||
"requested_extensions": ["device_info", "customer_info"],
|
||||
}
|
||||
|
||||
response_json = requests.post(
|
||||
f"https://api.amazon.{domain}/auth/register",
|
||||
headers=get_auth_headers(domain),
|
||||
json=body,
|
||||
).json()
|
||||
|
||||
try:
|
||||
tokens = {
|
||||
"name": hashlib.md5(
|
||||
email.encode()
|
||||
).hexdigest(), # to differentiate tokens from different accounts
|
||||
"domain": domain,
|
||||
"device_id": device_id,
|
||||
"access_token": response_json["response"]["success"]["tokens"]["bearer"][
|
||||
"access_token"
|
||||
],
|
||||
"refresh_token": response_json["response"]["success"]["tokens"]["bearer"][
|
||||
"refresh_token"
|
||||
],
|
||||
"device_private_key": response_json["response"]["success"]["tokens"][
|
||||
"mac_dms"
|
||||
]["device_private_key"],
|
||||
"adp_token": response_json["response"]["success"]["tokens"]["mac_dms"][
|
||||
"adp_token"
|
||||
],
|
||||
}
|
||||
return register_device(tokens)
|
||||
except:
|
||||
print(json.dumps(response_json))
|
||||
return None
|
||||
|
||||
|
||||
def refresh(tokens):
|
||||
body = {
|
||||
"app_name": APP_NAME,
|
||||
"app_version": APP_VERSION,
|
||||
"source_token_type": "refresh_token",
|
||||
"source_token": tokens["refresh_token"],
|
||||
"requested_token_type": "access_token",
|
||||
}
|
||||
|
||||
response_json = requests.post(
|
||||
f"https://api.amazon.com/auth/token",
|
||||
headers=get_auth_headers(tokens["domain"]),
|
||||
json=body,
|
||||
).json()
|
||||
try:
|
||||
tokens["access_token"] = response_json["access_token"]
|
||||
except:
|
||||
print(json.dumps(response_json))
|
||||
return tokens
|
||||
|
||||
|
||||
def signed_request(
|
||||
method,
|
||||
url,
|
||||
headers=None,
|
||||
body=None,
|
||||
asin=None,
|
||||
tokens=None,
|
||||
request_id=None,
|
||||
request_type=None,
|
||||
):
|
||||
"""
|
||||
modified from https://github.com/mkb79/Audible/blob/master/src/audible/auth.py
|
||||
"""
|
||||
|
||||
if not tokens:
|
||||
tokens = get_tokens()
|
||||
if not tokens:
|
||||
print("Could not retrieve auth tokens")
|
||||
return None
|
||||
elif "adp_token" not in tokens:
|
||||
print("Could not find the adp token in tokens")
|
||||
return None
|
||||
elif "device_private_key" not in tokens:
|
||||
print("Could not find the private key in tokens")
|
||||
return None
|
||||
|
||||
if not request_id:
|
||||
request_id = str(uuid.uuid4()).replace("-", "")
|
||||
else:
|
||||
request_id += str(int(time.time())) + "420"
|
||||
|
||||
if not body:
|
||||
body = ""
|
||||
|
||||
date = datetime.datetime.utcnow().isoformat("T")[:-7] + "Z"
|
||||
u = urlparse(url)
|
||||
path = f"{u.path}"
|
||||
if u.query != "":
|
||||
path += f"{u.params}?{u.query}"
|
||||
data = f"{method}\n{path}\n{date}\n{body}\n{tokens['adp_token']}"
|
||||
|
||||
key = RSA.import_key(base64.b64decode(tokens["device_private_key"]))
|
||||
signed_encoded = base64.b64encode(pkcs1_15.new(key).sign(SHA256.new(data.encode())))
|
||||
signature = f"{signed_encoded.decode()}:{date}"
|
||||
|
||||
if not headers:
|
||||
headers = get_api_headers()
|
||||
if asin:
|
||||
headers["x-adp-correlationid"] = f"{asin}-{int(time.time())}420.kindle.ebook"
|
||||
if request_type == "DRM_VOUCHER":
|
||||
headers["accept"] = "application/x-com.amazon.drm.Voucher@1.0"
|
||||
|
||||
headers.update(
|
||||
{
|
||||
"x-adp-token": tokens["adp_token"],
|
||||
"x-adp-alg": "SHA256WithRSA:1.0",
|
||||
"x-adp-signature": signature,
|
||||
"x-amzn-requestid": request_id,
|
||||
}
|
||||
)
|
||||
|
||||
return requests.Request(method, url, headers, data=body).prepare()
|
||||
|
||||
|
||||
def register_device(tokens=None):
|
||||
if not tokens:
|
||||
tokens = get_tokens()
|
||||
|
||||
url = "https://firs-ta-g7g.amazon.com/FirsProxy/registerAssociatedDevice"
|
||||
headers = {
|
||||
"Content-Type": "text/xml",
|
||||
"Expect": "",
|
||||
}
|
||||
body = f"<?xml version=\"1.0\" encoding=\"UTF-8\"?><request><parameters><deviceType>{DEVICE_TYPE}</deviceType><deviceSerialNumber>{tokens['device_id']}</deviceSerialNumber><pid>{PID}</pid><deregisterExisting>false</deregisterExisting><softwareVersion>{SW_VERSION}</softwareVersion><softwareComponentId>{APP_NAME}</softwareComponentId><authToken>{tokens['access_token']}</authToken><authTokenType>ACCESS_TOKEN</authTokenType></parameters></request>"
|
||||
|
||||
resp = requests.Session().send(
|
||||
signed_request("POST", url, headers, body, tokens=tokens)
|
||||
)
|
||||
|
||||
if resp.status_code == 200:
|
||||
parsed_response = xmltodict.parse(resp.text)
|
||||
tokens["device_private_key"] = parsed_response["response"]["device_private_key"]
|
||||
tokens["adp_token"] = parsed_response["response"]["adp_token"]
|
||||
|
||||
save_tokens(tokens)
|
||||
return tokens
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
arg_count = len(sys.argv)
|
||||
if arg_count != 4:
|
||||
print("usage: amazon_auth.py <email> <password> <domain>")
|
||||
print("domains: com, co.uk, co.jp, de")
|
||||
exit()
|
||||
|
||||
tokens = login(sys.argv[1], sys.argv[2], sys.argv[3])
|
||||
|
||||
if tokens == None:
|
||||
print("Could not login!")
|
||||
else:
|
||||
print(json.dumps(tokens))
|
||||
@@ -68,3 +68,12 @@ MY_KINDLE_STATS_INFO = "- I bought {books_len} books\n- I pushed {pdocs_len} doc
|
||||
|
||||
KINDLE_TABLE_HEAD = "| ID | Title | Authors | Acquired | Read | \n | ---- | ---- | ---- | ---- | ---- |\n"
|
||||
KINDLE_STAT_TEMPLATE = "| {id} | {title} | {authors} | {acquired} | {read} |\n"
|
||||
|
||||
API_MANIFEST_URL = (
|
||||
"https://kindle-digital-delivery.amazon.com/delivery/manifest/kindle.ebook/"
|
||||
)
|
||||
|
||||
API_HEADERS = {
|
||||
"User-Agent": "Comics/3.10.17[3.10.17.310418] Google/10",
|
||||
"x-client-application": "com.comixology.comics",
|
||||
}
|
||||
|
||||
@@ -1,2 +1,3 @@
|
||||
from .kfxdedrm import KFXZipBook
|
||||
from .kgenpids import get_pid_list
|
||||
from .mobidedrm import MobiBook
|
||||
|
||||
383
kindle_download_helper/dedrm/aescipher.py
Normal file
383
kindle_download_helper/dedrm/aescipher.py
Normal file
@@ -0,0 +1,383 @@
|
||||
import base64
|
||||
import hmac
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
import struct
|
||||
from hashlib import sha256
|
||||
from typing import Dict, Optional, Tuple, Union
|
||||
|
||||
from pbkdf2 import PBKDF2
|
||||
from pyaes import AESModeOfOperationCBC, Decrypter, Encrypter
|
||||
|
||||
logger = logging.getLogger("kindle.aescipher")
|
||||
|
||||
BLOCK_SIZE: int = 16 # the AES block size
|
||||
|
||||
|
||||
def aes_cbc_encrypt(
|
||||
key: bytes, iv: bytes, data: str, padding: str = "default"
|
||||
) -> bytes:
|
||||
"""Encrypts data in cipher block chaining mode of operation.
|
||||
|
||||
Args:
|
||||
key: The AES key.
|
||||
iv: The initialization vector.
|
||||
data: The data to encrypt.
|
||||
padding: Can be ``default`` or ``none`` (Default: default)
|
||||
|
||||
Returns:
|
||||
The encrypted data.
|
||||
"""
|
||||
encrypter = Encrypter(AESModeOfOperationCBC(key, iv), padding=padding)
|
||||
encrypted = encrypter.feed(data) + encrypter.feed()
|
||||
return encrypted
|
||||
|
||||
|
||||
def aes_cbc_decrypt(
|
||||
key: bytes, iv: bytes, encrypted_data: bytes, padding: str = "default"
|
||||
) -> bytes:
|
||||
"""Decrypts data encrypted in cipher block chaining mode of operation.
|
||||
|
||||
Args:
|
||||
key: The AES key used at encryption.
|
||||
iv: The initialization vector used at encryption.
|
||||
encrypted_data: The encrypted data to decrypt.
|
||||
padding: Can be ``default`` or ``none`` (Default: default)
|
||||
|
||||
Returns:
|
||||
The decrypted data.
|
||||
"""
|
||||
decrypter = Decrypter(AESModeOfOperationCBC(key, iv), padding=padding)
|
||||
decrypted = decrypter.feed(encrypted_data) + decrypter.feed()
|
||||
return decrypted
|
||||
|
||||
|
||||
def create_salt(salt_marker: bytes, kdf_iterations: int) -> Tuple[bytes, bytes]:
|
||||
"""Creates the header and salt for the :func:`derive_from_pbkdf2` function.
|
||||
|
||||
The header consist of the number of KDF iterations encoded as a big-endian
|
||||
word bytes wrapped by ``salt_marker`` on both sides.
|
||||
The random salt has a length of 16 bytes (the AES block size) minus the
|
||||
length of the salt header.
|
||||
"""
|
||||
header = salt_marker + struct.pack(">H", kdf_iterations) + salt_marker
|
||||
salt = os.urandom(BLOCK_SIZE - len(header))
|
||||
return header, salt
|
||||
|
||||
|
||||
def pack_salt(header: bytes, salt: bytes) -> bytes:
|
||||
"""Combines the header and salt created by :func:`create_salt` function."""
|
||||
return header + salt
|
||||
|
||||
|
||||
def unpack_salt(packed_salt: bytes, salt_marker: bytes) -> Tuple[bytes, int]:
|
||||
"""Unpack salt and kdf_iterations from previous created and packed salt."""
|
||||
mlen = len(salt_marker)
|
||||
hlen = mlen * 2 + 2
|
||||
|
||||
if not (
|
||||
packed_salt[:mlen] == salt_marker
|
||||
and packed_salt[mlen + 2 : hlen] == salt_marker
|
||||
):
|
||||
raise ValueError("Check salt_marker.")
|
||||
|
||||
kdf_iterations = struct.unpack(">H", packed_salt[mlen : mlen + 2])[0]
|
||||
salt = packed_salt[hlen:]
|
||||
return salt, kdf_iterations
|
||||
|
||||
|
||||
def derive_from_pbkdf2(
|
||||
password: str, *, key_size: int, salt: bytes, kdf_iterations: int, hashmod, mac
|
||||
) -> bytes:
|
||||
"""Creates an AES key with the :class:`PBKDF2` key derivation class."""
|
||||
kdf = PBKDF2(password, salt, min(kdf_iterations, 65535), hashmod, mac)
|
||||
return kdf.read(key_size)
|
||||
|
||||
|
||||
class AESCipher:
|
||||
"""Encrypt/Decrypt data using password to generate key.
|
||||
|
||||
The encryption algorithm used is symmetric AES in cipher-block chaining
|
||||
(CBC) mode.
|
||||
|
||||
The key is derived via the PBKDF2 key derivation function (KDF) from the
|
||||
password and a random salt of 16 bytes (the AES block size) minus the
|
||||
length of the salt header (see below).
|
||||
The hash function used by PBKDF2 is SHA256 per default. You can pass a
|
||||
different hash function module via the ``hashmod`` argument. The module
|
||||
must adhere to the Python API for Cryptographic Hash Functions (PEP 247).
|
||||
PBKDF2 uses a number of iterations of the hash function to derive the key,
|
||||
which can be set via the ``kdf_iterations`` keyword argument. The default
|
||||
number is 1000 and the maximum 65535.
|
||||
The header and the salt are written to the first block of the encrypted
|
||||
output (bytes mode) or written as key/value pairs (dict mode). The header
|
||||
consist of the number of KDF iterations encoded as a big-endian word bytes
|
||||
wrapped by ``salt_marker`` on both sides. With the default value of
|
||||
``salt_marker = b'$'``, the header size is thus 4 and the salt 12 bytes.
|
||||
The salt marker must be a byte string of 1-6 bytes length.
|
||||
The last block of the encrypted output is padded with up to 16 bytes, all
|
||||
having the value of the length of the padding.
|
||||
All values in dict mode are written as base64 encoded string.
|
||||
|
||||
Attributes:
|
||||
password: The password for encryption/decryption.
|
||||
key_size: The size of the key. Can be ``16``, ``24`` or ``32``
|
||||
(Default: 32).
|
||||
salt_marker: The salt marker with max. length of 6 bytes (Default: $).
|
||||
kdf_iterations: The number of iterations of the hash function to
|
||||
derive the key (Default: 1000).
|
||||
hashmod: The hash method to use (Default: sha256).
|
||||
mac: The mac module to use (Default: hmac).
|
||||
|
||||
Args:
|
||||
password: The password for encryption/decryption.
|
||||
key_size: The size of the key. Can be ``16``, ``24`` or ``32``
|
||||
(Default: 32).
|
||||
salt_marker: The salt marker with max. length of 6 bytes (Default: $).
|
||||
kdf_iterations: The number of iterations of the hash function to
|
||||
derive the key (Default: 1000).
|
||||
hashmod: The hash method to use (Default: sha256).
|
||||
mac: The mac module to use (Default: hmac).
|
||||
|
||||
Raises:
|
||||
ValueError: If `salt_marker` is not one to six bytes long.
|
||||
ValueError: If `kdf_iterations` is greater than 65535.
|
||||
TypeError: If type of `salt_marker` is not bytes.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
password: str,
|
||||
*,
|
||||
key_size: int = 32,
|
||||
salt_marker: bytes = b"$",
|
||||
kdf_iterations: int = 1000,
|
||||
hashmod=sha256,
|
||||
mac=hmac
|
||||
) -> None:
|
||||
if not 1 <= len(salt_marker) <= 6:
|
||||
raise ValueError("The salt_marker must be one to six bytes long.")
|
||||
|
||||
if not isinstance(salt_marker, bytes):
|
||||
raise TypeError("salt_marker must be a bytes instance.")
|
||||
|
||||
if kdf_iterations >= 65536:
|
||||
raise ValueError("kdf_iterations must be <= 65535.")
|
||||
|
||||
self.password = password
|
||||
self.key_size = key_size
|
||||
self.hashmod = hashmod
|
||||
self.mac = mac
|
||||
self.salt_marker = salt_marker
|
||||
self.kdf_iterations = kdf_iterations
|
||||
|
||||
def _encrypt(self, data: str) -> Tuple[bytes, bytes, bytes]:
|
||||
header, salt = create_salt(self.salt_marker, self.kdf_iterations)
|
||||
key = derive_from_pbkdf2(
|
||||
password=self.password,
|
||||
key_size=self.key_size,
|
||||
salt=salt,
|
||||
kdf_iterations=self.kdf_iterations,
|
||||
hashmod=self.hashmod,
|
||||
mac=self.mac,
|
||||
)
|
||||
iv = os.urandom(BLOCK_SIZE)
|
||||
encrypted_data = aes_cbc_encrypt(key, iv, data)
|
||||
return pack_salt(header, salt), iv, encrypted_data
|
||||
|
||||
def _decrypt(self, salt: bytes, iv: bytes, encrypted_data: bytes) -> str:
|
||||
try:
|
||||
salt, kdf_iterations = unpack_salt(salt, self.salt_marker)
|
||||
except ValueError:
|
||||
kdf_iterations = self.kdf_iterations
|
||||
|
||||
key = derive_from_pbkdf2(
|
||||
password=self.password,
|
||||
key_size=self.key_size,
|
||||
salt=salt,
|
||||
kdf_iterations=kdf_iterations,
|
||||
hashmod=self.hashmod,
|
||||
mac=self.mac,
|
||||
)
|
||||
return aes_cbc_decrypt(key, iv, encrypted_data).decode("utf-8")
|
||||
|
||||
def to_dict(self, data: str) -> Dict[str, str]:
|
||||
"""Encrypts data in dict style.
|
||||
|
||||
The output dict contains the base64 encoded (packed) salt, iv and
|
||||
ciphertext key/value pairs and an info key/value pair with additional
|
||||
encryption information.
|
||||
|
||||
Args:
|
||||
data: The data to encrypt.
|
||||
|
||||
Returns:
|
||||
The encrypted data in dict style.
|
||||
"""
|
||||
salt, iv, encrypted_data = self._encrypt(data)
|
||||
|
||||
return {
|
||||
"salt": base64.b64encode(salt).decode("utf-8"),
|
||||
"iv": base64.b64encode(iv).decode("utf-8"),
|
||||
"ciphertext": base64.b64encode(encrypted_data).decode("utf-8"),
|
||||
"info": "base64-encoded AES-CBC-256 of JSON object",
|
||||
}
|
||||
|
||||
def from_dict(self, data: dict) -> str:
|
||||
"""Decrypts data previously encrypted with :meth:`AESCipher.to_dict`.
|
||||
|
||||
Args:
|
||||
data: The encrypted data in json style.
|
||||
|
||||
Returns:
|
||||
The decrypted data.
|
||||
"""
|
||||
salt = base64.b64decode(data["salt"])
|
||||
iv = base64.b64decode(data["iv"])
|
||||
encrypted_data = base64.b64decode(data["ciphertext"])
|
||||
return self._decrypt(salt, iv, encrypted_data)
|
||||
|
||||
def to_bytes(self, data: str) -> bytes:
|
||||
"""Encrypts data in bytes style.
|
||||
|
||||
The output bytes contains the (packed) salt, iv and ciphertext.
|
||||
|
||||
Args:
|
||||
data: The data to encrypt.
|
||||
|
||||
Returns:
|
||||
The encrypted data in dict style.
|
||||
"""
|
||||
salt, iv, encrypted_data = self._encrypt(data)
|
||||
return salt + iv + encrypted_data
|
||||
|
||||
def from_bytes(self, data: bytes) -> str:
|
||||
"""Decrypts data previously encrypted with :meth:`AESCipher.to_bytes`.
|
||||
|
||||
Args:
|
||||
data: The encrypted data in bytes style.
|
||||
|
||||
Returns:
|
||||
The decrypted data.
|
||||
"""
|
||||
bs = BLOCK_SIZE
|
||||
salt = data[:bs]
|
||||
iv = data[bs : 2 * bs]
|
||||
encrypted_data = data[2 * bs :]
|
||||
return self._decrypt(salt, iv, encrypted_data)
|
||||
|
||||
def to_file(
|
||||
self,
|
||||
data: str,
|
||||
filename: pathlib.Path,
|
||||
encryption: str = "json",
|
||||
indent: int = 4,
|
||||
) -> None:
|
||||
"""Encrypts and saves data to given file.
|
||||
|
||||
Args:
|
||||
data: The data to encrypt.
|
||||
filename: The name of the file to save the data to.
|
||||
encryption: The encryption style to use. Can be ``json`` or
|
||||
``bytes`` (Default: json).
|
||||
indent: The indention level when saving in json style
|
||||
(Default: 4).
|
||||
|
||||
Raises:
|
||||
ValueError: If `encryption` is not ``json`` or ``bytes``.
|
||||
"""
|
||||
if encryption == "json":
|
||||
encrypted_dict = self.to_dict(data)
|
||||
data_json = json.dumps(encrypted_dict, indent=indent)
|
||||
filename.write_text(data_json)
|
||||
|
||||
elif encryption == "bytes":
|
||||
encrypted_data = self.to_bytes(data)
|
||||
filename.write_bytes(encrypted_data)
|
||||
|
||||
else:
|
||||
raise ValueError('encryption must be "json" or "bytes"..')
|
||||
|
||||
def from_file(self, filename: pathlib.Path, encryption: str = "json") -> str:
|
||||
"""Loads and decrypts data from given file.
|
||||
|
||||
Args:
|
||||
filename: The name of the file to load the data from.
|
||||
encryption: The encryption style which where used. Can be ``json``
|
||||
or ``bytes`` (Default: json).
|
||||
|
||||
Returns:
|
||||
The decrypted data.
|
||||
|
||||
Raises:
|
||||
ValueError: If `encryption` is not ``json`` or ``bytes``.
|
||||
"""
|
||||
if encryption == "json":
|
||||
encrypted_json = filename.read_text()
|
||||
encrypted_dict = json.loads(encrypted_json)
|
||||
return self.from_dict(encrypted_dict)
|
||||
|
||||
elif encryption == "bytes":
|
||||
encrypted_data = filename.read_bytes()
|
||||
return self.from_bytes(encrypted_data)
|
||||
|
||||
else:
|
||||
raise ValueError('encryption must be "json" or "bytes".')
|
||||
|
||||
|
||||
def detect_file_encryption(filename: pathlib.Path) -> Optional[str]:
|
||||
"""Detect the encryption format from an authentication file.
|
||||
|
||||
Args:
|
||||
filename: The name for the authentication file.
|
||||
|
||||
Returns:
|
||||
``False`` if file is not encrypted otherwise the encryption format.
|
||||
"""
|
||||
file = filename.read_bytes()
|
||||
encryption = None
|
||||
|
||||
try:
|
||||
file = json.loads(file)
|
||||
if "adp_token" in file:
|
||||
encryption = False
|
||||
elif "ciphertext" in file:
|
||||
encryption = "json"
|
||||
except UnicodeDecodeError:
|
||||
encryption = "bytes"
|
||||
|
||||
return encryption
|
||||
|
||||
|
||||
def remove_file_encryption(
|
||||
source: Union[str, pathlib.Path],
|
||||
target: Union[str, pathlib.Path],
|
||||
password: str,
|
||||
**kwargs
|
||||
) -> None:
|
||||
"""Removes the encryption from an authentication file.
|
||||
|
||||
Please try to load the authentication file with
|
||||
:meth:`audible.Authenticator.from_file` and save the authentication data
|
||||
as a unencrypted file first. Use this function as fallback if you ran into
|
||||
any error.
|
||||
|
||||
Args:
|
||||
source: The encrypted authentication file.
|
||||
target: The filename for the decrypted file.
|
||||
password: The password for the encrypted authentication file.
|
||||
|
||||
Raises:
|
||||
ValueError: If ``source`` is not encrypted.
|
||||
"""
|
||||
source_file = pathlib.Path(source)
|
||||
encryption = detect_file_encryption(source_file)
|
||||
|
||||
if not encryption:
|
||||
raise ValueError("file is not encrypted")
|
||||
|
||||
crypter = AESCipher(password, **kwargs)
|
||||
decrypted = crypter.from_file(source_file, encryption=encryption)
|
||||
pathlib.Path(target).write_text(decrypted)
|
||||
@@ -8,16 +8,16 @@ __license__ = "GPL v3"
|
||||
__version__ = "6.0"
|
||||
|
||||
|
||||
import sys, os, re
|
||||
import getopt
|
||||
import re
|
||||
import traceback
|
||||
import time
|
||||
import html.entities
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
|
||||
|
||||
import mobidedrm
|
||||
import kgenpids
|
||||
import mobidedrm
|
||||
|
||||
|
||||
class DrmException(Exception):
|
||||
@@ -57,7 +57,7 @@ def unicode_argv():
|
||||
# Windows, with the underlying Windows API instead replacing multi-byte
|
||||
# characters with '?'.
|
||||
|
||||
from ctypes import POINTER, byref, cdll, c_int, windll
|
||||
from ctypes import POINTER, byref, c_int, cdll, windll
|
||||
from ctypes.wintypes import LPCWSTR, LPWSTR
|
||||
|
||||
GetCommandLineW = cdll.kernel32.GetCommandLineW
|
||||
|
||||
444
kindle_download_helper/dedrm/kfxdedrm.py
Normal file
444
kindle_download_helper/dedrm/kfxdedrm.py
Normal file
@@ -0,0 +1,444 @@
|
||||
"""
|
||||
This code is copied from https://github.com/apprenticeharper/DeDRM_tools and
|
||||
recode to use amazon.ion instead of the DeDRM BinaryIonParser class. Added
|
||||
support for converting a metadata file from DRMION format.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import hmac
|
||||
import os
|
||||
import shutil
|
||||
import zipfile
|
||||
from io import BytesIO
|
||||
|
||||
from amazon.ion import simpleion
|
||||
from amazon.ion.core import IonType
|
||||
from amazon.ion.symbols import SymbolTableCatalog, shared_symbol_table
|
||||
|
||||
from .aescipher import aes_cbc_decrypt
|
||||
|
||||
pythonista_lzma = False
|
||||
import lzma
|
||||
|
||||
SYM_NAMES = [
|
||||
"com.amazon.drm.Envelope@1.0",
|
||||
"com.amazon.drm.EnvelopeMetadata@1.0",
|
||||
"size",
|
||||
"page_size",
|
||||
"encryption_key",
|
||||
"encryption_transformation",
|
||||
"encryption_voucher",
|
||||
"signing_key",
|
||||
"signing_algorithm",
|
||||
"signing_voucher",
|
||||
"com.amazon.drm.EncryptedPage@1.0",
|
||||
"cipher_text",
|
||||
"cipher_iv",
|
||||
"com.amazon.drm.Signature@1.0",
|
||||
"data",
|
||||
"com.amazon.drm.EnvelopeIndexTable@1.0",
|
||||
"length",
|
||||
"offset",
|
||||
"algorithm",
|
||||
"encoded",
|
||||
"encryption_algorithm",
|
||||
"hashing_algorithm",
|
||||
"expires",
|
||||
"format",
|
||||
"id",
|
||||
"lock_parameters",
|
||||
"strategy",
|
||||
"com.amazon.drm.Key@1.0",
|
||||
"com.amazon.drm.KeySet@1.0",
|
||||
"com.amazon.drm.PIDv3@1.0",
|
||||
"com.amazon.drm.PlainTextPage@1.0",
|
||||
"com.amazon.drm.PlainText@1.0",
|
||||
"com.amazon.drm.PrivateKey@1.0",
|
||||
"com.amazon.drm.PublicKey@1.0",
|
||||
"com.amazon.drm.SecretKey@1.0",
|
||||
"com.amazon.drm.Voucher@1.0",
|
||||
"public_key",
|
||||
"private_key",
|
||||
"com.amazon.drm.KeyPair@1.0",
|
||||
"com.amazon.drm.ProtectedData@1.0",
|
||||
"doctype",
|
||||
"com.amazon.drm.EnvelopeIndexTableOffset@1.0",
|
||||
"enddoc",
|
||||
"license_type",
|
||||
"license",
|
||||
"watermark",
|
||||
"key",
|
||||
"value",
|
||||
"com.amazon.drm.License@1.0",
|
||||
"category",
|
||||
"metadata",
|
||||
"categorized_metadata",
|
||||
"com.amazon.drm.CategorizedMetadata@1.0",
|
||||
"com.amazon.drm.VoucherEnvelope@1.0",
|
||||
"mac",
|
||||
"voucher",
|
||||
"com.amazon.drm.ProtectedData@2.0",
|
||||
"com.amazon.drm.Envelope@2.0",
|
||||
"com.amazon.drm.EnvelopeMetadata@2.0",
|
||||
"com.amazon.drm.EncryptedPage@2.0",
|
||||
"com.amazon.drm.PlainText@2.0",
|
||||
"compression_algorithm",
|
||||
"com.amazon.drm.Compressed@1.0",
|
||||
"page_index_table",
|
||||
"com.amazon.drm.VoucherEnvelope@2.0",
|
||||
"com.amazon.drm.VoucherEnvelope@3.0",
|
||||
]
|
||||
|
||||
|
||||
# asserts must always raise exceptions for proper functioning
|
||||
def _assert(test, msg="Exception"):
|
||||
if not test:
|
||||
raise Exception(msg)
|
||||
|
||||
|
||||
def get_ion_parser(ion: bytes, single_value: bool = True, addprottable: bool = False):
|
||||
catalog = SymbolTableCatalog()
|
||||
if addprottable:
|
||||
table = shared_symbol_table("ProtectedData", 1, SYM_NAMES)
|
||||
catalog.register(table)
|
||||
|
||||
return simpleion.loads(ion, catalog=catalog, single_value=single_value)
|
||||
|
||||
|
||||
class DrmIonVoucher:
|
||||
envelope = None
|
||||
version = None
|
||||
voucher = None
|
||||
drmkey = None
|
||||
license_type = "Unknown"
|
||||
|
||||
encalgorithm = ""
|
||||
enctransformation = ""
|
||||
hashalgorithm = ""
|
||||
|
||||
lockparams = None
|
||||
|
||||
ciphertext = b""
|
||||
cipheriv = b""
|
||||
secretkey = b""
|
||||
|
||||
def __init__(self, voucherenv, dsn, secret):
|
||||
self.dsn, self.secret = dsn, secret
|
||||
self.lockparams = []
|
||||
self.envelope = get_ion_parser(voucherenv, addprottable=True)
|
||||
|
||||
def decrypt_voucher(self):
|
||||
shared = (
|
||||
"PIDv3" + self.encalgorithm + self.enctransformation + self.hashalgorithm
|
||||
)
|
||||
|
||||
self.lockparams.sort()
|
||||
for param in self.lockparams:
|
||||
if param == "ACCOUNT_SECRET":
|
||||
shared += param + self.secret
|
||||
elif param == "CLIENT_ID":
|
||||
shared += param + self.dsn
|
||||
else:
|
||||
_assert(False, "Unknown lock parameter: %s" % param)
|
||||
|
||||
sharedsecret = shared.encode("ASCII")
|
||||
key = hmac.new(sharedsecret, b"PIDv3", digestmod=hashlib.sha256).digest()
|
||||
b = aes_cbc_decrypt(key[:32], self.cipheriv[:16], self.ciphertext)
|
||||
|
||||
self.drmkey = get_ion_parser(b, addprottable=True)
|
||||
_assert(
|
||||
len(self.drmkey) > 0
|
||||
and self.drmkey.ion_type == IonType.LIST
|
||||
and self.drmkey.ion_annotations[0].text == "com.amazon.drm.KeySet@1.0",
|
||||
"Expected KeySet, got %s" % self.drmkey.ion_annotations[0].text,
|
||||
)
|
||||
|
||||
for item in self.drmkey:
|
||||
if item.ion_annotations[0].text != "com.amazon.drm.SecretKey@1.0":
|
||||
continue
|
||||
|
||||
_assert(
|
||||
item["algorithm"] == "AES",
|
||||
"Unknown cipher algorithm: %s" % item["algorithm"],
|
||||
)
|
||||
_assert(item["format"] == "RAW", "Unknown key format: %s" % item["format"])
|
||||
|
||||
self.secretkey = item["encoded"]
|
||||
|
||||
def parse(self):
|
||||
_assert(len(self.envelope) > 0, "Envelope is empty")
|
||||
_assert(
|
||||
self.envelope.ion_type == IonType.STRUCT
|
||||
and self.envelope.ion_annotations[0].text.startswith(
|
||||
"com.amazon.drm.VoucherEnvelope@"
|
||||
),
|
||||
"Unknown type encountered in envelope, expected VoucherEnvelope",
|
||||
)
|
||||
self.version = int(self.envelope.ion_annotations[0].text.split("@")[1][:-2])
|
||||
self.voucher = get_ion_parser(self.envelope["voucher"], addprottable=True)
|
||||
|
||||
strategy_annotation_name = self.envelope["strategy"].ion_annotations[0].text
|
||||
_assert(
|
||||
strategy_annotation_name == "com.amazon.drm.PIDv3@1.0",
|
||||
"Unknown strategy: %s" % strategy_annotation_name,
|
||||
)
|
||||
|
||||
strategy = self.envelope["strategy"]
|
||||
self.encalgorithm = strategy["encryption_algorithm"]
|
||||
self.enctransformation = strategy["encryption_transformation"]
|
||||
self.hashalgorithm = strategy["hashing_algorithm"]
|
||||
lockparams = strategy["lock_parameters"]
|
||||
_assert(
|
||||
lockparams.ion_type == IonType.LIST,
|
||||
"Expected string list for lock_parameters",
|
||||
)
|
||||
self.lockparams.extend(lockparams)
|
||||
|
||||
self.parse_voucher()
|
||||
|
||||
def parse_voucher(self):
|
||||
_assert(len(self.voucher) > 0, "Voucher is empty")
|
||||
_assert(
|
||||
self.voucher.ion_type == IonType.STRUCT
|
||||
and self.voucher.ion_annotations[0].text == "com.amazon.drm.Voucher@1.0",
|
||||
"Unknown type, expected Voucher",
|
||||
)
|
||||
|
||||
self.cipheriv = self.voucher["cipher_iv"]
|
||||
self.ciphertext = self.voucher["cipher_text"]
|
||||
|
||||
_assert(
|
||||
self.voucher["license"].ion_annotations[0].text
|
||||
== "com.amazon.drm.License@1.0",
|
||||
"Unknown license: %s" % self.voucher["license"].ion_annotations[0].text,
|
||||
)
|
||||
self.license_type = self.voucher["license"]["license_type"]
|
||||
|
||||
def get_license_type(self):
|
||||
return self.license_type
|
||||
|
||||
|
||||
class DrmIon:
|
||||
ion = None
|
||||
voucher = None
|
||||
vouchername = ""
|
||||
key = b""
|
||||
onvoucherrequired = None
|
||||
|
||||
def __init__(self, ionstream, onvoucherrequired):
|
||||
self.ion = get_ion_parser(ionstream, addprottable=True, single_value=False)
|
||||
self.onvoucherrequired = onvoucherrequired
|
||||
|
||||
def parse(self, outpages):
|
||||
_assert(len(self.ion) > 0, "DRMION envelope is empty")
|
||||
_assert(
|
||||
self.ion[0].ion_type == IonType.SYMBOL
|
||||
and self.ion[0].ion_annotations[0].text == "doctype",
|
||||
"Expected doctype symbol",
|
||||
)
|
||||
_assert(
|
||||
self.ion[1].ion_type == IonType.LIST
|
||||
and self.ion[1].ion_annotations[0].text
|
||||
in ["com.amazon.drm.Envelope@1.0", "com.amazon.drm.Envelope@2.0"],
|
||||
"Unknown type encountered in DRMION envelope, expected Envelope, got %s"
|
||||
% self.ion[1].ion_annotations[0].text,
|
||||
)
|
||||
|
||||
for ion_list in self.ion:
|
||||
if not ion_list.ion_annotations[0].text in [
|
||||
"com.amazon.drm.Envelope@1.0",
|
||||
"com.amazon.drm.Envelope@2.0",
|
||||
]:
|
||||
continue
|
||||
|
||||
for item in ion_list:
|
||||
if item.ion_annotations[0].text in [
|
||||
"com.amazon.drm.EnvelopeMetadata@1.0",
|
||||
"com.amazon.drm.EnvelopeMetadata@2.0",
|
||||
]:
|
||||
if item.get("encryption_voucher") is None:
|
||||
continue
|
||||
|
||||
if self.vouchername == "":
|
||||
self.vouchername = item["encryption_voucher"]
|
||||
self.voucher = self.onvoucherrequired(self.vouchername)
|
||||
self.key = self.voucher.secretkey
|
||||
_assert(
|
||||
self.key is not None,
|
||||
"Unable to obtain secret key from voucher",
|
||||
)
|
||||
else:
|
||||
_assert(
|
||||
self.vouchername == item["encryption_voucher"],
|
||||
"Unexpected: Different vouchers required for same file?",
|
||||
)
|
||||
|
||||
elif item.ion_annotations[0].text in [
|
||||
"com.amazon.drm.EncryptedPage@1.0",
|
||||
"com.amazon.drm.EncryptedPage@2.0",
|
||||
]:
|
||||
decompress = False
|
||||
decrypt = True
|
||||
if item["cipher_text"].ion_annotations:
|
||||
if (
|
||||
item["cipher_text"].ion_annotations[0].text
|
||||
== "com.amazon.drm.Compressed@1.0"
|
||||
):
|
||||
decompress = True
|
||||
ct = item["cipher_text"]
|
||||
civ = item["cipher_iv"]
|
||||
if ct is not None and civ is not None:
|
||||
self.processpage(ct, civ, outpages, decompress, decrypt)
|
||||
|
||||
elif item.ion_annotations[0].text in [
|
||||
"com.amazon.drm.PlainText@1.0",
|
||||
"com.amazon.drm.PlainText@2.0",
|
||||
]:
|
||||
decompress = False
|
||||
decrypt = False
|
||||
if (
|
||||
item["data"].ion_annotations[0].text
|
||||
== "com.amazon.drm.Compressed@1.0"
|
||||
):
|
||||
decompress = True
|
||||
self.processpage(item["data"], None, outpages, decompress, decrypt)
|
||||
|
||||
def processpage(self, ct, civ, outpages, decompress, decrypt):
|
||||
if decrypt:
|
||||
msg = aes_cbc_decrypt(self.key[:16], civ[:16], ct)
|
||||
else:
|
||||
msg = ct
|
||||
|
||||
if not decompress:
|
||||
outpages.write(msg)
|
||||
return
|
||||
|
||||
_assert(msg[0] == 0, "LZMA UseFilter not supported")
|
||||
|
||||
if pythonista_lzma:
|
||||
segment = lzma.decompress(msg[1:])
|
||||
msg = b""
|
||||
outpages.write(segment.getvalue())
|
||||
return 0
|
||||
|
||||
decomp = lzma.LZMADecompressor(format=lzma.FORMAT_ALONE)
|
||||
while not decomp.eof:
|
||||
segment = decomp.decompress(msg[1:])
|
||||
msg = b"" # Contents were internally buffered after the first call
|
||||
outpages.write(segment)
|
||||
|
||||
|
||||
class KFXZipBook:
|
||||
def __init__(self, infile, dsn):
|
||||
self.infile = infile
|
||||
self.dsn = dsn
|
||||
self.voucher = None
|
||||
self.decrypted = {}
|
||||
|
||||
def getPIDMetaInfo(self):
|
||||
return (None, None)
|
||||
|
||||
def processBook(self):
|
||||
with zipfile.ZipFile(self.infile, "r") as zf:
|
||||
for filename in zf.namelist():
|
||||
with zf.open(filename) as fh:
|
||||
data = fh.read(8)
|
||||
if data != b"\xeaDRMION\xee":
|
||||
continue
|
||||
data += fh.read()
|
||||
if self.voucher is None:
|
||||
self.decrypt_voucher()
|
||||
print("Decrypting KFX DRMION: {0}".format(filename))
|
||||
outfile = BytesIO()
|
||||
DrmIon(data[8:-8], lambda name: self.voucher).parse(outfile)
|
||||
outfile = outfile.getvalue()
|
||||
if len(outfile) > 0:
|
||||
self.decrypted[filename] = outfile
|
||||
else:
|
||||
print(
|
||||
"Decrypting KFX DRMION {0} results in a length of Zero. Skip file.".format(
|
||||
filename
|
||||
)
|
||||
)
|
||||
|
||||
if not self.decrypted:
|
||||
print("The .kfx-zip archive does not contain an encrypted DRMION file")
|
||||
|
||||
def decrypt_voucher(self):
|
||||
with zipfile.ZipFile(self.infile, "r") as zf:
|
||||
for info in zf.infolist():
|
||||
with zf.open(info.filename) as fh:
|
||||
data = fh.read(4)
|
||||
if data != b"\xe0\x01\x00\xea":
|
||||
continue
|
||||
|
||||
data += fh.read()
|
||||
if b"ProtectedData" in data:
|
||||
break # found DRM voucher
|
||||
else:
|
||||
raise Exception(
|
||||
"The .kfx-zip archive contains an encrypted DRMION file without a DRM voucher"
|
||||
)
|
||||
|
||||
print("Decrypting KFX DRM voucher: {0}".format(info.filename))
|
||||
|
||||
for pid in [""] + [self.dsn]:
|
||||
for dsn_len, secret_len in [
|
||||
(0, 0),
|
||||
(16, 0),
|
||||
(16, 40),
|
||||
(32, 40),
|
||||
(40, 0),
|
||||
(40, 40),
|
||||
]:
|
||||
if len(pid) == dsn_len + secret_len:
|
||||
break # split pid into DSN and account secret
|
||||
else:
|
||||
continue
|
||||
|
||||
try:
|
||||
voucher = DrmIonVoucher(data, pid[:dsn_len], pid[dsn_len:])
|
||||
voucher.parse()
|
||||
voucher.decrypt_voucher()
|
||||
break
|
||||
except:
|
||||
pass
|
||||
else:
|
||||
raise Exception("Failed to decrypt KFX DRM voucher with any key")
|
||||
|
||||
print("KFX DRM voucher successfully decrypted")
|
||||
|
||||
license_type = voucher.get_license_type()
|
||||
if license_type != "Purchase":
|
||||
raise Exception(
|
||||
(
|
||||
"This book is licensed as {0}. "
|
||||
"These tools are intended for use on purchased books."
|
||||
).format(license_type)
|
||||
)
|
||||
|
||||
self.voucher = voucher
|
||||
|
||||
def getBookTitle(self):
|
||||
return os.path.splitext(os.path.split(self.infile)[1])[0]
|
||||
|
||||
def getBookExtension(self):
|
||||
return ".kfx-zip"
|
||||
|
||||
def getBookType(self):
|
||||
return "KFX-ZIP"
|
||||
|
||||
def cleanup(self):
|
||||
pass
|
||||
|
||||
def getFile(self, outpath):
|
||||
if not self.decrypted:
|
||||
shutil.copyfile(self.infile, outpath)
|
||||
else:
|
||||
with zipfile.ZipFile(self.infile, "r") as zif:
|
||||
with zipfile.ZipFile(outpath, "w") as zof:
|
||||
for info in zif.infolist():
|
||||
zof.writestr(
|
||||
info,
|
||||
self.decrypted.get(info.filename, zif.read(info.filename)),
|
||||
)
|
||||
@@ -9,9 +9,8 @@ __version__ = "3.0"
|
||||
|
||||
import binascii
|
||||
import hashlib
|
||||
from struct import pack
|
||||
import traceback
|
||||
|
||||
from struct import pack
|
||||
|
||||
global charMap1
|
||||
global charMap3
|
||||
|
||||
@@ -23,8 +23,8 @@ __version__ = "1.0"
|
||||
# For example, ActiveState Python, which exists for windows.
|
||||
|
||||
|
||||
import struct
|
||||
import binascii
|
||||
import struct
|
||||
|
||||
|
||||
class DrmException(Exception):
|
||||
|
||||
400
kindle_download_helper/no_kindle.py
Normal file
400
kindle_download_helper/no_kindle.py
Normal file
@@ -0,0 +1,400 @@
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import pathlib
|
||||
import re
|
||||
import shutil
|
||||
import time
|
||||
from collections import namedtuple
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from io import BytesIO
|
||||
from zipfile import ZipFile
|
||||
|
||||
import requests
|
||||
import xmltodict
|
||||
from amazon.ion import simpleion
|
||||
from mobi import extract
|
||||
from rich import print
|
||||
|
||||
from kindle_download_helper import amazon_api
|
||||
from kindle_download_helper.config import (
|
||||
API_MANIFEST_URL,
|
||||
DEFAULT_OUT_DEDRM_DIR,
|
||||
DEFAULT_OUT_DIR,
|
||||
DEFAULT_OUT_EPUB_DIR,
|
||||
)
|
||||
from kindle_download_helper.dedrm import MobiBook, get_pid_list
|
||||
from kindle_download_helper.dedrm.kfxdedrm import KFXZipBook
|
||||
from kindle_download_helper.third_party.ion import DrmIon, DrmIonVoucher
|
||||
from kindle_download_helper.third_party.kfxlib import YJ_Book
|
||||
|
||||
DEBUG = False
|
||||
|
||||
|
||||
class Scope(Enum):
|
||||
REQUIRED = 1
|
||||
PREFERRED = 2
|
||||
DEFERRED = 3
|
||||
|
||||
def should_download(self, s: str):
|
||||
r = Scope[s.upper()]
|
||||
return self.value >= r.value
|
||||
|
||||
|
||||
Request = namedtuple("Request", ["method", "url", "fn", "headers"])
|
||||
|
||||
|
||||
def _build_correlation_id(device, serial, asin, timestamp):
|
||||
if timestamp is None:
|
||||
timestamp = datetime.utcnow().timestamp()
|
||||
timestamp = str(int(timestamp) * 1000)
|
||||
return f"Device:{device}:{serial};kindle.EBOK:{asin}:{timestamp}"
|
||||
|
||||
|
||||
class NoKindle:
|
||||
def __init__(
|
||||
self,
|
||||
email,
|
||||
password,
|
||||
domain,
|
||||
out_dir=DEFAULT_OUT_DIR,
|
||||
out_dedrm_dir=DEFAULT_OUT_DEDRM_DIR,
|
||||
out_epub_dir=DEFAULT_OUT_EPUB_DIR,
|
||||
cut_length=100,
|
||||
):
|
||||
self.out_dir = out_dir
|
||||
self.out_dedrm_dir = out_dedrm_dir
|
||||
self.out_epub_dir = out_epub_dir
|
||||
self.session = requests.Session()
|
||||
self.ebooks = []
|
||||
self.pdocs = []
|
||||
self.library_dict = {}
|
||||
|
||||
print("Authenticating . . .")
|
||||
self.tokens = amazon_api.login(email, password, domain)
|
||||
|
||||
def decrypt_voucher(self, voucher_data):
|
||||
with BytesIO(voucher_data) as voucher_data_io:
|
||||
for pid in [""] + [self.tokens["device_id"]]:
|
||||
for dsn_len, secret_len in [
|
||||
(0, 0),
|
||||
(16, 0),
|
||||
(16, 40),
|
||||
(32, 40),
|
||||
(40, 0),
|
||||
(40, 40),
|
||||
]:
|
||||
if len(pid) == dsn_len + secret_len:
|
||||
break # split pid into DSN and account secret
|
||||
else:
|
||||
continue
|
||||
voucher = DrmIonVoucher(voucher_data_io, pid[:dsn_len], pid[dsn_len:])
|
||||
voucher.parse()
|
||||
voucher.decryptvoucher()
|
||||
return voucher
|
||||
|
||||
def decrypt_kfx(self, kfx_data):
|
||||
if kfx_data[:8] != b"\xeaDRMION\xee":
|
||||
return kfx_data
|
||||
|
||||
with BytesIO() as decrypted_data:
|
||||
DrmIon(BytesIO(kfx_data[8:-8]), lambda name: self.drm_voucher).parse(
|
||||
decrypted_data
|
||||
)
|
||||
return decrypted_data.getvalue()
|
||||
|
||||
def get_resource(self, resource, asin):
|
||||
resp = self.session.send(
|
||||
amazon_api.signed_request(
|
||||
"GET",
|
||||
resource["endpoint"]["url"],
|
||||
asin=asin,
|
||||
tokens=self.tokens,
|
||||
request_id=resource["id"],
|
||||
request_type=resource["type"],
|
||||
)
|
||||
)
|
||||
|
||||
filename = resource["id"]
|
||||
if resource["type"] == "DRM_VOUCHER":
|
||||
filename += ".ast"
|
||||
else:
|
||||
filename += ".kfx"
|
||||
|
||||
return (resp.content, filename)
|
||||
|
||||
def make_library(self, last_sync=None):
|
||||
"""Fetches the user library."""
|
||||
url = "https://todo-ta-g7g.amazon.com/FionaTodoListProxy/syncMetaData"
|
||||
params = {"item_count": 10000}
|
||||
|
||||
if isinstance(last_sync, dict):
|
||||
try:
|
||||
last_sync = last_sync["sync_time"]
|
||||
except KeyError as exc:
|
||||
raise ValueError("`last_sync` doesn't contain `sync_time`.") from exc
|
||||
|
||||
if last_sync is not None:
|
||||
params["last_sync_time"] = last_sync
|
||||
|
||||
r = self.session.send(
|
||||
amazon_api.signed_request(
|
||||
"GET",
|
||||
url,
|
||||
tokens=self.tokens,
|
||||
)
|
||||
)
|
||||
library = xmltodict.parse(r.text)
|
||||
library = json.loads(json.dumps(library))
|
||||
library = library["response"]["add_update_list"]
|
||||
ebooks = [i for i in library["meta_data"] if i["cde_contenttype"] == "EBOK"]
|
||||
pdocs = [i for i in library["meta_data"] if i["cde_contenttype"] == "PDOC"]
|
||||
ebooks = [e for e in ebooks if e["origins"]["origin"]["type"] == "Purchase"]
|
||||
unknow_index = 1
|
||||
for i in pdocs + ebooks:
|
||||
if isinstance(i["title"], dict):
|
||||
if i["ASIN"] in self.library_dict:
|
||||
unknow_index += 1
|
||||
self.library_dict[i["ASIN"]] = i["title"].get(
|
||||
"#text", str(unknow_index)
|
||||
)
|
||||
else:
|
||||
self.library_dict[i["ASIN"]] = i["title"]
|
||||
|
||||
self.ebooks = ebooks
|
||||
self.pdocs = pdocs
|
||||
|
||||
def sidecar_ebook(self, asin):
|
||||
url = f"https://sars.amazon.com/sidecar/sa/EBOK/{asin}"
|
||||
r = self.session.send(
|
||||
amazon_api.signed_request(
|
||||
"GET",
|
||||
url,
|
||||
tokens=self.tokens,
|
||||
)
|
||||
)
|
||||
print(r.json())
|
||||
|
||||
@staticmethod
|
||||
def _b64ion_to_dict(b64ion: str):
|
||||
ion = base64.b64decode(b64ion)
|
||||
ion = simpleion.loads(ion)
|
||||
return dict(ion)
|
||||
|
||||
def get_book(self, asin):
|
||||
manifest_resp = self.session.send(
|
||||
amazon_api.signed_request(
|
||||
"GET",
|
||||
API_MANIFEST_URL + asin.upper(),
|
||||
asin=asin,
|
||||
tokens=self.tokens,
|
||||
request_type="manifest",
|
||||
)
|
||||
)
|
||||
try:
|
||||
resources = manifest_resp.json()["resources"]
|
||||
except Exception as e:
|
||||
print(manifest_resp.json(), str(e))
|
||||
return None, False, str(e)
|
||||
manifest = manifest_resp.json()
|
||||
# azw3 is not so hard
|
||||
drm_voucher_list = [
|
||||
resource for resource in resources if resource["type"] == "DRM_VOUCHER"
|
||||
]
|
||||
if not drm_voucher_list:
|
||||
return manifest, False, "Succeed"
|
||||
|
||||
drm_voucher = drm_voucher_list[0]
|
||||
try:
|
||||
self.drm_voucher = self.decrypt_voucher(
|
||||
self.get_resource(drm_voucher, asin)[0]
|
||||
)
|
||||
except:
|
||||
print("Could not decrypt the drm voucher!")
|
||||
|
||||
manifest["responseContext"] = self._b64ion_to_dict(manifest["responseContext"])
|
||||
for resource in manifest["resources"]:
|
||||
if "responseContext" in resource:
|
||||
resource["responseContext"] = self._b64ion_to_dict(
|
||||
resource["responseContext"]
|
||||
)
|
||||
return manifest, True, "Succeed"
|
||||
|
||||
def download_book(self, asin, error=None):
|
||||
manifest, is_kfx, info = self.get_book(asin)
|
||||
if not manifest:
|
||||
print(f"Error to download ASIN: {asin}, error: {str(info)}")
|
||||
return
|
||||
if is_kfx:
|
||||
self._download_kfx(manifest, asin)
|
||||
else:
|
||||
self._download_azw(manifest, asin)
|
||||
|
||||
def _download_kfx(self, manifest, asin):
|
||||
resources = manifest["resources"]
|
||||
parts = []
|
||||
scope = Scope.DEFERRED
|
||||
if isinstance(scope, str):
|
||||
try:
|
||||
scope = Scope[scope.upper()]
|
||||
except KeyError:
|
||||
allowed_scopes = [s.name.lower() for s in Scope]
|
||||
raise ValueError(
|
||||
"Scope must be in %s, got %s" % (", ".join(allowed_scopes), scope)
|
||||
)
|
||||
for resource in resources:
|
||||
if not scope.should_download(resource["requirement"]):
|
||||
continue
|
||||
try:
|
||||
url = (
|
||||
resource.get("optimalEndpoint", {}).get("directUrl")
|
||||
or resource.get("endpoint")["url"]
|
||||
)
|
||||
except KeyError:
|
||||
raise RuntimeError("No url found for item with id %s." % resource["id"])
|
||||
headers = {}
|
||||
fn = None
|
||||
|
||||
if resource["type"] == "DRM_VOUCHER":
|
||||
fn = resource["id"] + ".voucher"
|
||||
correlation_id = _build_correlation_id(
|
||||
"A2A33MVZVPQKHY",
|
||||
self.tokens["device_id"],
|
||||
asin=manifest["content"]["id"],
|
||||
timestamp=manifest["responseContext"]["manifestTime"],
|
||||
)
|
||||
|
||||
headers = {
|
||||
"User-Agent": "Kindle/1.0.235280.0.10 CFNetwork/1220.1 Darwin/20.3.0",
|
||||
"X-ADP-AttemptCount": "1",
|
||||
"X-ADP-CorrelationId": correlation_id,
|
||||
"X-ADP-Transport": str(manifest["responseContext"]["transport"]),
|
||||
"X-ADP-Reason": str(manifest["responseContext"]["reason"]),
|
||||
"x-amzn-accept-type": "application/x.amzn.digital.deliverymanifest@1.0",
|
||||
"X-ADP-SW": str(manifest["responseContext"]["swVersion"]),
|
||||
"X-ADP-LTO": "60",
|
||||
"Accept": "application/x-com.amazon.drm.Voucher@1.0",
|
||||
}
|
||||
if "country" in manifest["responseContext"]:
|
||||
headers["X-ADP-Country"] = str(
|
||||
manifest["responseContext"]["country"]
|
||||
)
|
||||
|
||||
url += "&supportedVoucherVersions=V1"
|
||||
elif resource["type"] == "KINDLE_MAIN_BASE":
|
||||
fn = manifest["content"]["id"] + "_EBOK.azw"
|
||||
elif resource["type"] == "KINDLE_MAIN_METADATA":
|
||||
fn = resource["id"] + ".azw.md"
|
||||
elif resource["type"] == "KINDLE_MAIN_ATTACHABLE":
|
||||
fn = resource["id"] + ".azw.res"
|
||||
elif resource["type"] == "KINDLE_USER_ANOT":
|
||||
fn = manifest["content"]["id"] + "_EBOK.mbpV2"
|
||||
|
||||
parts.append(Request(method="GET", url=url, fn=fn, headers=headers))
|
||||
|
||||
files = []
|
||||
for part in parts:
|
||||
r = self.session.send(
|
||||
amazon_api.signed_request(
|
||||
part.method,
|
||||
part.url,
|
||||
asin=asin,
|
||||
tokens=self.tokens,
|
||||
headers=part.headers,
|
||||
)
|
||||
)
|
||||
fn = part.fn
|
||||
|
||||
if fn is None:
|
||||
cd = r.headers.get("content-disposition")
|
||||
fn = re.findall('filename="(.+)"', cd)
|
||||
fn = fn[0]
|
||||
fn = os.path.join(self.out_dir, fn)
|
||||
|
||||
files.append(fn)
|
||||
fn.write_bytes(r.content)
|
||||
print(f"Book part successfully saved to {fn}")
|
||||
|
||||
asin = manifest["content"]["id"].upper()
|
||||
manifest_file = pathlib.Path(f"{asin}.manifest")
|
||||
manifest_json_data = json.dumps(manifest)
|
||||
manifest_file.write_text(manifest_json_data)
|
||||
files.append(manifest_file)
|
||||
name = self.library_dict.get(asin)
|
||||
if len(name) > self.cut_length:
|
||||
name = name[: self.cut_length - 10]
|
||||
fn = name + "_" + asin + "_EBOK.kfx-zip"
|
||||
fn = os.path.join(self.out_dir, fn)
|
||||
out_epub = os.path.join(self.out_epub_dir, name.split(".")[0] + ".epub")
|
||||
with ZipFile(fn, "w") as myzip:
|
||||
for file in files:
|
||||
myzip.write(file)
|
||||
file.unlink()
|
||||
|
||||
fn_dec = name + "_" + asin + "_EBOK.kfx-zip.tmp"
|
||||
fn_dec = os.path.join(name + "_" + asin + "_EBOK.kfx-zip.tmp")
|
||||
kfx_book = KFXZipBook(fn, self.tokens["device_id"])
|
||||
kfx_book.voucher = self.drm_voucher
|
||||
kfx_book.processBook()
|
||||
kfx_book.getFile(fn_dec)
|
||||
pathlib.Path(fn).unlink()
|
||||
pathlib.Path(fn_dec).rename(fn)
|
||||
b = YJ_Book(fn)
|
||||
epub_data = b.convert_to_epub()
|
||||
with open(out_epub, "wb") as f:
|
||||
f.write(epub_data)
|
||||
|
||||
def _download_azw(self, manifest, asin):
|
||||
resources = manifest["resources"]
|
||||
url = resources[0]["endpoint"]["url"]
|
||||
r = self.session.send(
|
||||
amazon_api.signed_request(
|
||||
"GET",
|
||||
url,
|
||||
asin=asin,
|
||||
tokens=self.tokens,
|
||||
)
|
||||
)
|
||||
name = self.library_dict.get(asin)
|
||||
if len(name) > self.cut_length:
|
||||
name = name[: self.cut_length - 10]
|
||||
out = os.path.join(self.out_dir, name + ".azw3")
|
||||
out_epub = os.path.join(self.out_epub_dir, name + ".epub")
|
||||
|
||||
with open(out, "wb") as f:
|
||||
for chunk in r.iter_content(chunk_size=512):
|
||||
f.write(chunk)
|
||||
out_dedrm = os.path.join(self.out_dedrm_dir, name)
|
||||
time.sleep(1)
|
||||
mb = MobiBook(out)
|
||||
md1, md2 = mb.get_pid_meta_info()
|
||||
totalpids = get_pid_list(md1, md2, [self.device_serial_number], [])
|
||||
totalpids = list(set(totalpids))
|
||||
mb.make_drm_file(totalpids, out_dedrm)
|
||||
time.sleep(1)
|
||||
# save to EPUB
|
||||
epub_dir, epub_file = extract(out_dedrm)
|
||||
print(epub_file)
|
||||
shutil.copy2(epub_file, out_epub)
|
||||
# delete it
|
||||
shutil.rmtree(epub_dir)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
kindle = NoKindle()
|
||||
kindle.make_library()
|
||||
for e in kindle.ebooks:
|
||||
try:
|
||||
# if e['ASIN'] == 'B01C7CFR5G':
|
||||
if 1:
|
||||
kindle.download_book(e["ASIN"])
|
||||
else:
|
||||
print(f"Pass: {e['ASIN']}")
|
||||
except Exception as e:
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
print(e)
|
||||
# spider rule
|
||||
time.sleep(1)
|
||||
1259
kindle_download_helper/third_party/ion.py
vendored
Normal file
1259
kindle_download_helper/third_party/ion.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
36
kindle_download_helper/third_party/kfxlib/__init__.py
vendored
Normal file
36
kindle_download_helper/third_party/kfxlib/__init__.py
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
#! /usr/bin/python3
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from . import message_logging, utilities, yj_book, yj_metadata
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2016-2022, John Howell <jhowell@acm.org>"
|
||||
|
||||
|
||||
set_logger = message_logging.set_logger
|
||||
YJ_Book = yj_book.YJ_Book
|
||||
YJ_Metadata = yj_metadata.YJ_Metadata
|
||||
KFXDRMError = utilities.KFXDRMError
|
||||
|
||||
|
||||
clean_message = utilities.clean_message
|
||||
file_read_binary = utilities.file_read_binary
|
||||
file_write_binary = utilities.file_write_binary
|
||||
file_read_utf8 = utilities.file_read_utf8
|
||||
file_write_utf8 = utilities.file_write_utf8
|
||||
json_deserialize = utilities.json_deserialize
|
||||
json_serialize = utilities.json_serialize
|
||||
unicode_argv = utilities.unicode_argv
|
||||
windows_long_path_fix = utilities.windows_long_path_fix
|
||||
|
||||
IS_LINUX = utilities.IS_LINUX
|
||||
IS_MACOS = utilities.IS_MACOS
|
||||
IS_WINDOWS = utilities.IS_WINDOWS
|
||||
|
||||
user_home_dir = utilities.user_home_dir
|
||||
windows_user_dir = utilities.windows_user_dir
|
||||
|
||||
locale_encode = utilities.locale_encode
|
||||
locale_decode = utilities.locale_decode
|
||||
os_environ_get = utilities.os_environ_get
|
||||
1760
kindle_download_helper/third_party/kfxlib/epub_output.py
vendored
Normal file
1760
kindle_download_helper/third_party/kfxlib/epub_output.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
439
kindle_download_helper/third_party/kfxlib/ion.py
vendored
Normal file
439
kindle_download_helper/third_party/kfxlib/ion.py
vendored
Normal file
@@ -0,0 +1,439 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import collections
|
||||
import datetime
|
||||
import decimal
|
||||
import math
|
||||
import re
|
||||
|
||||
from .message_logging import log
|
||||
from .python_transition import IS_PYTHON2, bytes_to_hex
|
||||
from .utilities import sha1, type_name
|
||||
|
||||
if IS_PYTHON2:
|
||||
from .python_transition import repr, str
|
||||
else:
|
||||
long = int
|
||||
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2016-2022, John Howell <jhowell@acm.org>"
|
||||
|
||||
|
||||
LARGE_DATA_SIZE = 256
|
||||
MAX_ASCII_DATA_SIZE = 10000
|
||||
|
||||
|
||||
IonBool = bool
|
||||
IonDecimal = decimal.Decimal
|
||||
IonFloat = float
|
||||
IonInt = int
|
||||
IonList = list
|
||||
IonNull = type(None)
|
||||
IonString = str
|
||||
|
||||
|
||||
def ion_type(value):
|
||||
t = type(value)
|
||||
if t in ION_TYPES:
|
||||
return t
|
||||
|
||||
if isinstance(value, IonAnnotation):
|
||||
return IonAnnotation
|
||||
|
||||
if isinstance(value, IonList) and not isinstance(value, IonSExp):
|
||||
return IonList
|
||||
|
||||
if isinstance(value, long):
|
||||
return IonInt
|
||||
|
||||
raise Exception("Data has non-Ion type %s: %s" % (type_name(value), repr(value)))
|
||||
|
||||
|
||||
def isstring(value):
|
||||
return isinstance(value, str) and not isinstance(value, IonSymbol)
|
||||
|
||||
|
||||
class IonAnnotation(object):
|
||||
def __init__(self, annotations, value):
|
||||
self.annotations = (
|
||||
annotations
|
||||
if isinstance(annotations, IonAnnots)
|
||||
else IonAnnots(annotations)
|
||||
)
|
||||
|
||||
if isinstance(value, IonAnnotation):
|
||||
raise Exception("IonAnnotation cannot be annotated")
|
||||
|
||||
self.value = value
|
||||
|
||||
def __repr__(self):
|
||||
return "%s %s" % (repr(self.annotations), repr(self.value))
|
||||
|
||||
def __str__(self):
|
||||
return repr(self.annotations)
|
||||
|
||||
def is_single(self):
|
||||
return len(self.annotations) == 1
|
||||
|
||||
def has_annotation(self, annotation):
|
||||
return annotation in self.annotations
|
||||
|
||||
def is_annotation(self, annotation):
|
||||
return self.is_single() and self.annotations[0] == annotation
|
||||
|
||||
def get_annotation(self):
|
||||
if not self.is_single():
|
||||
raise Exception(
|
||||
"get_annotation expected single annotation, found %s"
|
||||
% repr(self.annotations)
|
||||
)
|
||||
|
||||
return self.annotations[0]
|
||||
|
||||
def verify_annotation(self, annotation):
|
||||
if not self.is_annotation(annotation):
|
||||
raise Exception(
|
||||
"Expected annotation %s, found %s"
|
||||
% (repr(annotation), repr(self.annotations))
|
||||
)
|
||||
|
||||
return self
|
||||
|
||||
|
||||
class IonAnnots(tuple):
|
||||
def __new__(cls, annotations):
|
||||
annots = tuple.__new__(cls, annotations)
|
||||
|
||||
if len(annots) == 0:
|
||||
raise Exception("IonAnnotation cannot be empty")
|
||||
|
||||
for a in annots:
|
||||
if not isinstance(a, IonSymbol):
|
||||
raise Exception("IonAnnotation must be IonSymbol: %s" % repr(a))
|
||||
|
||||
return annots
|
||||
|
||||
def __repr__(self):
|
||||
return " ".join(["%s::" % repr(a) for a in self])
|
||||
|
||||
|
||||
class IonBLOB(bytes):
|
||||
def __eq__(self, other):
|
||||
if other is None:
|
||||
return False
|
||||
|
||||
if not isinstance(other, (IonBLOB, bytes)):
|
||||
raise Exception("IonBLOB __eq__: comparing with %s" % type_name(other))
|
||||
|
||||
return bytes(self) == bytes(other)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
def __lt__(self, other):
|
||||
raise Exception("IonBLOB __lt__ not implemented")
|
||||
|
||||
def __le__(self, other):
|
||||
raise Exception("IonBLOB __le__ not implemented")
|
||||
|
||||
def __gt__(self, other):
|
||||
raise Exception("IonBLOB __gt__ not implemented")
|
||||
|
||||
def __ge__(self, other):
|
||||
raise Exception("IonBLOB __ge__ not implemented")
|
||||
|
||||
def __repr__(self):
|
||||
return "*** %d byte BLOB %s ***" % (len(self), bytes_to_hex(sha1(self)))
|
||||
|
||||
def ascii_data(self):
|
||||
if len(self) > MAX_ASCII_DATA_SIZE:
|
||||
return None
|
||||
|
||||
try:
|
||||
data = self.decode("ascii")
|
||||
except UnicodeDecodeError:
|
||||
return None
|
||||
|
||||
for c in data:
|
||||
o = ord(c)
|
||||
if (o < 32 and o not in [9, 10, 13]) or o >= 127:
|
||||
return None
|
||||
|
||||
return data
|
||||
|
||||
def is_large(self):
|
||||
return len(self) >= LARGE_DATA_SIZE and self.ascii_data() is None
|
||||
|
||||
def tobytes(self):
|
||||
return bytes(self)
|
||||
|
||||
|
||||
class IonCLOB(bytes):
|
||||
def tobytes(self):
|
||||
return bytes(self)
|
||||
|
||||
|
||||
class IonNop(object):
|
||||
pass
|
||||
|
||||
|
||||
class IonSExp(list):
|
||||
def __repr__(self):
|
||||
return "(%s)" % (", ".join([repr(v) for v in self]))
|
||||
|
||||
def tolist(self):
|
||||
return list(self)
|
||||
|
||||
|
||||
class IonStruct(collections.OrderedDict):
|
||||
def __init__(self, *args):
|
||||
if len(args) == 1:
|
||||
collections.OrderedDict.__init__(self, args[0])
|
||||
return
|
||||
|
||||
collections.OrderedDict.__init__(self)
|
||||
if len(args) % 2 != 0:
|
||||
raise Exception("IonStruct created with %d arguments" % len(args))
|
||||
|
||||
for i in range(0, len(args), 2):
|
||||
self[args[i]] = args[i + 1]
|
||||
|
||||
def __repr__(self):
|
||||
return "{%s}" % (
|
||||
", ".join(["%s: %s" % (repr(k), repr(v)) for k, v in self.items()])
|
||||
)
|
||||
|
||||
def todict(self):
|
||||
return collections.OrderedDict(self)
|
||||
|
||||
|
||||
class IonSymbol(str):
|
||||
def __repr__(self):
|
||||
if re.match(r"^[\u0021-\u007e]+$", self):
|
||||
return str(self)
|
||||
|
||||
return "`%s`" % self
|
||||
|
||||
def tostring(self):
|
||||
return str(self)
|
||||
|
||||
|
||||
IS = IonSymbol
|
||||
|
||||
|
||||
class IonTimestamp(datetime.datetime):
|
||||
def __repr__(self):
|
||||
value = self
|
||||
|
||||
if isinstance(value.tzinfo, IonTimestampTZ):
|
||||
format = value.tzinfo.format()
|
||||
format = format.replace(
|
||||
"%f", ("%06d" % value.microsecond)[: value.tzinfo.fraction_len()]
|
||||
)
|
||||
|
||||
if value.year < 1900:
|
||||
format = format.replace("%Y", "%04d" % value.year)
|
||||
value = value.replace(year=1900)
|
||||
|
||||
return value.strftime(format) + (
|
||||
value.tzname() if value.tzinfo.present() else ""
|
||||
)
|
||||
|
||||
return value.isoformat()
|
||||
|
||||
|
||||
ION_TIMESTAMP_Y = "%YT"
|
||||
ION_TIMESTAMP_YM = "%Y-%mT"
|
||||
ION_TIMESTAMP_YMD = "%Y-%m-%d"
|
||||
ION_TIMESTAMP_YMDHM = "%Y-%m-%dT%H:%M"
|
||||
ION_TIMESTAMP_YMDHMS = "%Y-%m-%dT%H:%M:%S"
|
||||
ION_TIMESTAMP_YMDHMSF = "%Y-%m-%dT%H:%M:%S.%f"
|
||||
|
||||
|
||||
class IonTimestampTZ(datetime.tzinfo):
|
||||
def __init__(self, offset, format, fraction_len):
|
||||
datetime.tzinfo.__init__(self)
|
||||
self.__offset = offset
|
||||
self.__format = format
|
||||
self.__fraction_len = fraction_len
|
||||
self.__present = format in {
|
||||
ION_TIMESTAMP_YMDHM,
|
||||
ION_TIMESTAMP_YMDHMS,
|
||||
ION_TIMESTAMP_YMDHMSF,
|
||||
}
|
||||
|
||||
if offset and not self.__present:
|
||||
raise Exception(
|
||||
"IonTimestampTZ has offset %d with non-present format" % offset
|
||||
)
|
||||
|
||||
if offset and (offset < -1439 or offset > 1439):
|
||||
raise Exception("IonTimestampTZ has invalid offset %d" % offset)
|
||||
|
||||
if fraction_len < 0 or fraction_len > 6:
|
||||
raise Exception("IonTimestampTZ has invalid fraction len %d" % fraction_len)
|
||||
|
||||
if fraction_len and format != ION_TIMESTAMP_YMDHMSF:
|
||||
raise Exception(
|
||||
"IonTimestampTZ has fraction len %d without fraction in format"
|
||||
% fraction_len
|
||||
)
|
||||
|
||||
def utcoffset(self, dt):
|
||||
return datetime.timedelta(minutes=(self.__offset or 0))
|
||||
|
||||
def tzname(self, dt):
|
||||
if self.__offset is None:
|
||||
name = "-00:00"
|
||||
elif self.__offset == 0:
|
||||
name = "Z"
|
||||
else:
|
||||
name = "%s%02d:%02d" % (
|
||||
"+" if self.__offset >= 0 else "-",
|
||||
abs(self.__offset) // 60,
|
||||
abs(self.__offset) % 60,
|
||||
)
|
||||
|
||||
return name.encode("ascii") if IS_PYTHON2 else name
|
||||
|
||||
def dst(self, dt):
|
||||
return datetime.timedelta(0)
|
||||
|
||||
def offset_minutes(self):
|
||||
return self.__offset
|
||||
|
||||
def format(self):
|
||||
return self.__format
|
||||
|
||||
def present(self):
|
||||
return self.__present
|
||||
|
||||
def fraction_len(self):
|
||||
return self.__fraction_len
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, IonTimestampTZ):
|
||||
raise Exception(
|
||||
"IonTimestampTZ __eq__: comparing with %s" % type_name(other)
|
||||
)
|
||||
|
||||
return (self.__offset, self.__format, self.__fraction_len) == (
|
||||
other.__offset,
|
||||
other.__format,
|
||||
other.__fraction_len,
|
||||
)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
def __copy__(self):
|
||||
return self
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
return self
|
||||
|
||||
|
||||
ION_TYPES = {
|
||||
IonAnnotation,
|
||||
IonBool,
|
||||
IonBLOB,
|
||||
IonCLOB,
|
||||
IonDecimal,
|
||||
IonFloat,
|
||||
IonInt,
|
||||
IonList,
|
||||
IonNull,
|
||||
IonSExp,
|
||||
IonString,
|
||||
IonStruct,
|
||||
IonSymbol,
|
||||
IonTimestamp,
|
||||
}
|
||||
|
||||
|
||||
def unannotated(value):
|
||||
return value.value if isinstance(value, IonAnnotation) else value
|
||||
|
||||
|
||||
def ion_data_eq(f1, f2, msg="Ion data mismatch", report_errors=True):
|
||||
def ion_data_eq_(f1, f2, ctx):
|
||||
data_type = ion_type(f1)
|
||||
|
||||
if ion_type(f2) is not data_type:
|
||||
ctx.append("type mismatch: %s != %s" % (type_name(f1), type_name(f2)))
|
||||
return False
|
||||
|
||||
if data_type is IonAnnotation:
|
||||
if not ion_data_eq_(IonList(f1.annotations), IonList(f2.annotations), ctx):
|
||||
ctx.append("IonAnnotation")
|
||||
return False
|
||||
|
||||
if not ion_data_eq_(f1.value, f2.value, ctx):
|
||||
ctx.append("in IonAnnotation %s" % repr(f1))
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
if data_type in [IonList, IonSExp]:
|
||||
if len(f1) != len(f2):
|
||||
ctx.append("%s length %d != %d" % (type_name(f1), len(f1), len(f2)))
|
||||
return False
|
||||
|
||||
for i, (d1, d2) in enumerate(zip(f1, f2)):
|
||||
if not ion_data_eq_(d1, d2, ctx):
|
||||
ctx.append("at %s index %d" % (type_name(f1), i))
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
if data_type is IonStruct:
|
||||
if len(f1) != len(f2):
|
||||
ctx.append("IonStruct length %d != %d" % (len(f1), len(f2)))
|
||||
return False
|
||||
|
||||
for f1k, f1v in f1.items():
|
||||
if f1k not in f2:
|
||||
ctx.append("IonStruct key %s missing" % f1k)
|
||||
return False
|
||||
|
||||
if not ion_data_eq_(f1v, f2[f1k], ctx):
|
||||
ctx.append("at IonStruct key %s" % f1k)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
if data_type is IonFloat and math.isnan(f1) and math.isnan(f2):
|
||||
return True
|
||||
|
||||
if f1 != f2 or repr(f1) != repr(f2):
|
||||
ctx.append("value %s != %s" % (repr(f1), repr(f2)))
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
ctx = []
|
||||
success = ion_data_eq_(f1, f2, ctx)
|
||||
|
||||
if report_errors and not success:
|
||||
log.error("%s: %s" % (msg, ", ".join(ctx[::-1])))
|
||||
|
||||
return success
|
||||
|
||||
|
||||
def filtered_IonList(ion_list, omit_large_blobs=False):
|
||||
if not omit_large_blobs:
|
||||
return ion_list
|
||||
|
||||
filtered = []
|
||||
for val in ion_list[:]:
|
||||
if (
|
||||
ion_type(val) is IonAnnotation
|
||||
and ion_type(val.value) is IonBLOB
|
||||
and val.value.is_large()
|
||||
):
|
||||
val = IonAnnotation(val.annotations, repr(val.value))
|
||||
|
||||
filtered.append(val)
|
||||
|
||||
return filtered
|
||||
734
kindle_download_helper/third_party/kfxlib/ion_binary.py
vendored
Normal file
734
kindle_download_helper/third_party/kfxlib/ion_binary.py
vendored
Normal file
@@ -0,0 +1,734 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import decimal
|
||||
import struct
|
||||
|
||||
from .ion import (
|
||||
ION_TIMESTAMP_Y,
|
||||
ION_TIMESTAMP_YM,
|
||||
ION_TIMESTAMP_YMD,
|
||||
ION_TIMESTAMP_YMDHM,
|
||||
ION_TIMESTAMP_YMDHMS,
|
||||
ION_TIMESTAMP_YMDHMSF,
|
||||
IonAnnotation,
|
||||
IonBLOB,
|
||||
IonBool,
|
||||
IonCLOB,
|
||||
IonDecimal,
|
||||
IonFloat,
|
||||
IonInt,
|
||||
IonList,
|
||||
IonNop,
|
||||
IonNull,
|
||||
IonSExp,
|
||||
IonString,
|
||||
IonStruct,
|
||||
IonSymbol,
|
||||
IonTimestamp,
|
||||
IonTimestampTZ,
|
||||
ion_type,
|
||||
)
|
||||
from .ion_text import IonSerial
|
||||
from .message_logging import log
|
||||
from .python_transition import IS_PYTHON2, bytes_, bytes_indexed
|
||||
from .utilities import Deserializer, Serializer, bytes_to_separated_hex
|
||||
|
||||
if IS_PYTHON2:
|
||||
from .python_transition import repr
|
||||
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2016-2022, John Howell <jhowell@acm.org>"
|
||||
|
||||
|
||||
DEBUG = False
|
||||
|
||||
|
||||
class IonBinary(IonSerial):
|
||||
MAJOR_VERSION = 1
|
||||
MINOR_VERSION = 0
|
||||
|
||||
VERSION_MARKER = 0xE0
|
||||
|
||||
SIGNATURE = bytes_([VERSION_MARKER, MAJOR_VERSION, MINOR_VERSION, 0xEA])
|
||||
|
||||
def deserialize_multiple_values(
|
||||
self, data, import_symbols=False, with_offsets=False
|
||||
):
|
||||
values = self.deserialize_multiple_values_(data, import_symbols, with_offsets)
|
||||
|
||||
return values
|
||||
|
||||
SORTED_STRUCT_FLAG = 1
|
||||
VARIABLE_LEN_FLAG = 14
|
||||
NULL_FLAG = 15
|
||||
|
||||
def serialize_multiple_values_(self, values):
|
||||
serial = Serializer()
|
||||
serial.append(IonBinary.SIGNATURE)
|
||||
|
||||
for value in values:
|
||||
serial.append(self.serialize_value(value))
|
||||
|
||||
return serial.serialize()
|
||||
|
||||
def deserialize_multiple_values_(self, data, import_symbols, with_offsets):
|
||||
if DEBUG:
|
||||
log.debug("decoding: %s" % bytes_to_separated_hex(data[:1000]))
|
||||
|
||||
serial = Deserializer(data)
|
||||
self.import_symbols = import_symbols
|
||||
|
||||
ion_signature = serial.extract(4)
|
||||
if ion_signature != IonBinary.SIGNATURE:
|
||||
raise Exception(
|
||||
"Ion signature is incorrect (%s)"
|
||||
% bytes_to_separated_hex(ion_signature)
|
||||
)
|
||||
|
||||
result = []
|
||||
while len(serial):
|
||||
if serial.extract(1, advance=False) == IonBinary.VERSION_MARKER:
|
||||
ion_signature = serial.unpack("4s")
|
||||
if ion_signature != IonBinary.SIGNATURE:
|
||||
raise Exception(
|
||||
"Embedded Ion signature is incorrect (%s)"
|
||||
% bytes_to_separated_hex(ion_signature)
|
||||
)
|
||||
else:
|
||||
value_offset = serial.offset
|
||||
value = self.deserialize_value(serial)
|
||||
|
||||
if self.import_symbols and isinstance(value, IonAnnotation):
|
||||
if value.is_annotation("$ion_symbol_table"):
|
||||
self.symtab.create(value.value)
|
||||
elif value.is_annotation("$ion_shared_symbol_table"):
|
||||
self.symtab.catalog.create_shared_symbol_table(value.value)
|
||||
|
||||
if not isinstance(value, IonNop):
|
||||
result.append(
|
||||
[value_offset, serial.offset - value_offset, value]
|
||||
if with_offsets
|
||||
else value
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def serialize_value(self, value):
|
||||
handler = IonBinary.ION_TYPE_HANDLERS[ion_type(value)]
|
||||
signature, data = handler(self, value)
|
||||
|
||||
if signature is None:
|
||||
return data
|
||||
|
||||
length = len(data)
|
||||
|
||||
if length < IonBinary.VARIABLE_LEN_FLAG:
|
||||
return descriptor(signature, length) + data
|
||||
|
||||
return (
|
||||
descriptor(signature, IonBinary.VARIABLE_LEN_FLAG)
|
||||
+ serialize_vluint(length)
|
||||
+ data
|
||||
)
|
||||
|
||||
def deserialize_value(self, serial):
|
||||
descriptor = serial.unpack("B")
|
||||
if descriptor == IonBinary.VERSION_MARKER:
|
||||
raise Exception("Unexpected Ion version marker within data stream")
|
||||
|
||||
signature = descriptor >> 4
|
||||
flag = descriptor & 0x0F
|
||||
if DEBUG:
|
||||
log.debug(
|
||||
"IonBinary 0x%02x: signature=%d flag=%d data=%s"
|
||||
% (
|
||||
descriptor,
|
||||
signature,
|
||||
flag,
|
||||
bytes_to_separated_hex(serial.extract(advance=False)[:16]),
|
||||
)
|
||||
)
|
||||
|
||||
extract_data, deserializer, name = IonBinary.VALUE_DESERIALIZERS[signature]
|
||||
|
||||
if flag == IonBinary.NULL_FLAG and signature != IonBinary.NULL_VALUE_SIGNATURE:
|
||||
log.error("IonBinary: Deserialized null of type %s" % name)
|
||||
extract_data, deserializer, name = IonBinary.VALUE_DESERIALIZERS[
|
||||
IonBinary.NULL_VALUE_SIGNATURE
|
||||
]
|
||||
|
||||
if extract_data:
|
||||
length = (
|
||||
deserialize_vluint(serial)
|
||||
if flag == IonBinary.VARIABLE_LEN_FLAG
|
||||
else flag
|
||||
)
|
||||
return deserializer(self, serial.extract(length))
|
||||
|
||||
return deserializer(self, flag, serial)
|
||||
|
||||
NULL_VALUE_SIGNATURE = 0
|
||||
|
||||
def serialize_null_value(self, value):
|
||||
return (None, descriptor(IonBinary.NULL_VALUE_SIGNATURE, IonBinary.NULL_FLAG))
|
||||
|
||||
def deserialize_null_value(self, flag, serial):
|
||||
if flag == IonBinary.NULL_FLAG:
|
||||
return None
|
||||
|
||||
length = (
|
||||
deserialize_vluint(serial) if flag == IonBinary.VARIABLE_LEN_FLAG else flag
|
||||
)
|
||||
serial.extract(length)
|
||||
return IonNop()
|
||||
|
||||
BOOL_VALUE_SIGNATURE = 1
|
||||
|
||||
def serialize_bool_value(self, value):
|
||||
return (None, descriptor(IonBinary.BOOL_VALUE_SIGNATURE, 1 if value else 0))
|
||||
|
||||
def deserialize_bool_value(self, flag, serial):
|
||||
if flag > 1:
|
||||
raise Exception("BinaryIonBool: Unknown IonBool flag value: %d" % flag)
|
||||
|
||||
return flag != 0
|
||||
|
||||
def serialize_int_value(self, value):
|
||||
return (
|
||||
(IonBinary.POSINT_VALUE_SIGNATURE, serialize_unsignedint(value))
|
||||
if value >= 0
|
||||
else (IonBinary.NEGINT_VALUE_SIGNATURE, serialize_unsignedint(-value))
|
||||
)
|
||||
|
||||
POSINT_VALUE_SIGNATURE = 2
|
||||
|
||||
def deserialize_posint_value(self, data):
|
||||
return deserialize_unsignedint(data)
|
||||
|
||||
NEGINT_VALUE_SIGNATURE = 3
|
||||
|
||||
def deserialize_negint_value(self, data):
|
||||
if len(data) == 0:
|
||||
log.error("BinaryIonNegInt has no data")
|
||||
|
||||
if bytes_indexed(data, 0) == 0:
|
||||
log.error(
|
||||
"BinaryIonNegInt data starts with 0x00: %s"
|
||||
% bytes_to_separated_hex(data)
|
||||
)
|
||||
|
||||
return -deserialize_unsignedint(data)
|
||||
|
||||
FLOAT_VALUE_SIGNATURE = 4
|
||||
|
||||
def serialize_float_value(self, value):
|
||||
return (
|
||||
IonBinary.FLOAT_VALUE_SIGNATURE,
|
||||
b"" if value == 0.0 else struct.pack(">d", value),
|
||||
)
|
||||
|
||||
def deserialize_float_value(self, data):
|
||||
if len(data) == 0:
|
||||
return float(0.0)
|
||||
|
||||
if len(data) == 4:
|
||||
return struct.unpack_from(">f", data)[0]
|
||||
|
||||
if len(data) == 8:
|
||||
return struct.unpack_from(">d", data)[0]
|
||||
|
||||
raise Exception(
|
||||
"IonFloat unexpected data length: %s" % bytes_to_separated_hex(data)
|
||||
)
|
||||
|
||||
DECIMAL_VALUE_SIGNATURE = 5
|
||||
|
||||
def serialize_decimal_value(self, value):
|
||||
if value.is_zero():
|
||||
return (IonBinary.DECIMAL_VALUE_SIGNATURE, b"")
|
||||
|
||||
vt = value.as_tuple()
|
||||
return (
|
||||
IonBinary.DECIMAL_VALUE_SIGNATURE,
|
||||
serialize_vlsint(vt.exponent)
|
||||
+ serialize_signedint(combine_decimal_digits(vt.digits, vt.sign)),
|
||||
)
|
||||
|
||||
def deserialize_decimal_value(self, data):
|
||||
if len(data) == 0:
|
||||
return decimal.Decimal(0)
|
||||
|
||||
serial = Deserializer(data)
|
||||
exponent = deserialize_vlsint(serial)
|
||||
magnitude = deserialize_signedint(serial.extract())
|
||||
return decimal.Decimal(magnitude) * (decimal.Decimal(10) ** exponent)
|
||||
|
||||
TIMESTAMP_VALUE_SIGNATURE = 6
|
||||
|
||||
def serialize_timestamp_value(self, value):
|
||||
serial = Serializer()
|
||||
|
||||
if isinstance(value.tzinfo, IonTimestampTZ):
|
||||
offset_minutes = value.tzinfo.offset_minutes()
|
||||
format_len = len(value.tzinfo.format())
|
||||
fraction_exponent = -value.tzinfo.fraction_len()
|
||||
else:
|
||||
offset_minutes = (
|
||||
int(value.utcoffset().total_seconds()) // 60
|
||||
if value.utcoffset() is not None
|
||||
else None
|
||||
)
|
||||
format_len = len(ION_TIMESTAMP_YMDHMSF)
|
||||
fraction_exponent = -3
|
||||
|
||||
serial.append(serialize_vlsint(offset_minutes))
|
||||
serial.append(serialize_vluint(value.year))
|
||||
|
||||
if format_len >= len(ION_TIMESTAMP_YM):
|
||||
serial.append(serialize_vluint(value.month))
|
||||
|
||||
if format_len >= len(ION_TIMESTAMP_YMD):
|
||||
serial.append(serialize_vluint(value.day))
|
||||
|
||||
if format_len >= len(ION_TIMESTAMP_YMDHM):
|
||||
serial.append(serialize_vluint(value.hour))
|
||||
serial.append(serialize_vluint(value.minute))
|
||||
|
||||
if format_len >= len(ION_TIMESTAMP_YMDHMS):
|
||||
serial.append(serialize_vluint(value.second))
|
||||
|
||||
if format_len >= len(ION_TIMESTAMP_YMDHMSF):
|
||||
serial.append(serialize_vlsint(fraction_exponent))
|
||||
serial.append(
|
||||
serialize_signedint(
|
||||
(value.microsecond * int(10**-fraction_exponent))
|
||||
// 1000000
|
||||
)
|
||||
)
|
||||
|
||||
return (IonBinary.TIMESTAMP_VALUE_SIGNATURE, serial.serialize())
|
||||
|
||||
def deserialize_timestamp_value(self, data):
|
||||
serial = Deserializer(data)
|
||||
|
||||
offset_minutes = deserialize_vlsint(serial, allow_minus_zero=True)
|
||||
year = deserialize_vluint(serial)
|
||||
month = deserialize_vluint(serial) if len(serial) > 0 else None
|
||||
day = deserialize_vluint(serial) if len(serial) > 0 else None
|
||||
hour = deserialize_vluint(serial) if len(serial) > 0 else None
|
||||
minute = deserialize_vluint(serial) if len(serial) > 0 else None
|
||||
second = deserialize_vluint(serial) if len(serial) > 0 else None
|
||||
|
||||
if len(serial) > 0:
|
||||
fraction_exponent = deserialize_vlsint(serial)
|
||||
fraction_coefficient = (
|
||||
deserialize_signedint(serial.extract()) if len(serial) > 0 else 0
|
||||
)
|
||||
|
||||
if fraction_coefficient == 0 and fraction_exponent > -1:
|
||||
microsecond = None
|
||||
else:
|
||||
if fraction_exponent < -6 or fraction_exponent > -1:
|
||||
log.error(
|
||||
"Unexpected IonTimestamp fraction exponent %d coefficient %d: %s"
|
||||
% (
|
||||
fraction_exponent,
|
||||
fraction_coefficient,
|
||||
bytes_to_separated_hex(data),
|
||||
)
|
||||
)
|
||||
|
||||
microsecond = (fraction_coefficient * 1000000) // int(
|
||||
10**-fraction_exponent
|
||||
)
|
||||
|
||||
if microsecond < 0 or microsecond > 999999:
|
||||
log.error(
|
||||
"Incorrect IonTimestamp fraction %d usec: %s"
|
||||
% (microsecond, bytes_to_separated_hex(data))
|
||||
)
|
||||
microsecond = None
|
||||
fraction_exponent = 0
|
||||
else:
|
||||
microsecond = None
|
||||
fraction_exponent = 0
|
||||
|
||||
if month is None:
|
||||
format = ION_TIMESTAMP_Y
|
||||
offset_minutes = None
|
||||
elif day is None:
|
||||
format = ION_TIMESTAMP_YM
|
||||
offset_minutes = None
|
||||
elif hour is None:
|
||||
format = ION_TIMESTAMP_YMD
|
||||
offset_minutes = None
|
||||
elif second is None:
|
||||
format = ION_TIMESTAMP_YMDHM
|
||||
elif microsecond is None:
|
||||
format = ION_TIMESTAMP_YMDHMS
|
||||
else:
|
||||
format = ION_TIMESTAMP_YMDHMSF
|
||||
|
||||
return IonTimestamp(
|
||||
year,
|
||||
month if month is not None else 1,
|
||||
day if day is not None else 1,
|
||||
hour if hour is not None else 0,
|
||||
minute if hour is not None else 0,
|
||||
second if second is not None else 0,
|
||||
microsecond if microsecond is not None else 0,
|
||||
IonTimestampTZ(offset_minutes, format, -fraction_exponent),
|
||||
)
|
||||
|
||||
SYMBOL_VALUE_SIGNATURE = 7
|
||||
|
||||
def serialize_symbol_value(self, value):
|
||||
symbol_id = self.symtab.get_id(value)
|
||||
if not symbol_id:
|
||||
raise Exception("attempt to serialize undefined symbol %s" % repr(value))
|
||||
|
||||
return (IonBinary.SYMBOL_VALUE_SIGNATURE, serialize_unsignedint(symbol_id))
|
||||
|
||||
def deserialize_symbol_value(self, data):
|
||||
return self.symtab.get_symbol(deserialize_unsignedint(data))
|
||||
|
||||
STRING_VALUE_SIGNATURE = 8
|
||||
|
||||
def serialize_string_value(self, value):
|
||||
return (IonBinary.STRING_VALUE_SIGNATURE, value.encode("utf-8"))
|
||||
|
||||
def deserialize_string_value(self, data):
|
||||
return data.decode("utf-8")
|
||||
|
||||
CLOB_VALUE_SIGNATURE = 9
|
||||
|
||||
def serialize_clob_value(self, value):
|
||||
log.error("Serialize CLOB")
|
||||
return (IonBinary.CLOB_VALUE_SIGNATURE, bytes(value))
|
||||
|
||||
def deserialize_clob_value(self, data):
|
||||
log.error("Deserialize CLOB")
|
||||
return IonCLOB(data)
|
||||
|
||||
BLOB_VALUE_SIGNATURE = 10
|
||||
|
||||
def serialize_blob_value(self, value):
|
||||
return (IonBinary.BLOB_VALUE_SIGNATURE, bytes(value))
|
||||
|
||||
def deserialize_blob_value(self, data):
|
||||
return IonBLOB(data)
|
||||
|
||||
LIST_VALUE_SIGNATURE = 11
|
||||
|
||||
def serialize_list_value(self, value):
|
||||
serial = Serializer()
|
||||
for val in value:
|
||||
serial.append(self.serialize_value(val))
|
||||
|
||||
return (IonBinary.LIST_VALUE_SIGNATURE, serial.serialize())
|
||||
|
||||
def deserialize_list_value(self, data, top_level=False):
|
||||
serial = Deserializer(data)
|
||||
result = []
|
||||
while len(serial):
|
||||
value = self.deserialize_value(serial)
|
||||
|
||||
if not isinstance(value, IonNop):
|
||||
result.append(value)
|
||||
|
||||
return result
|
||||
|
||||
SEXP_VALUE_SIGNATURE = 12
|
||||
|
||||
def serialize_sexp_value(self, value):
|
||||
return (
|
||||
IonBinary.SEXP_VALUE_SIGNATURE,
|
||||
self.serialize_list_value(list(value))[1],
|
||||
)
|
||||
|
||||
def deserialize_sexp_value(self, data):
|
||||
return IonSExp(self.deserialize_list_value(data))
|
||||
|
||||
STRUCT_VALUE_SIGNATURE = 13
|
||||
|
||||
def serialize_struct_value(self, value):
|
||||
serial = Serializer()
|
||||
|
||||
for key, val in value.items():
|
||||
serial.append(serialize_vluint(self.symtab.get_id(key)))
|
||||
serial.append(self.serialize_value(val))
|
||||
|
||||
return (IonBinary.STRUCT_VALUE_SIGNATURE, serial.serialize())
|
||||
|
||||
def deserialize_struct_value(self, flag, serial):
|
||||
if flag == IonBinary.SORTED_STRUCT_FLAG:
|
||||
log.error("BinaryIonStruct: Sorted IonStruct encountered")
|
||||
flag = IonBinary.VARIABLE_LEN_FLAG
|
||||
|
||||
serial2 = Deserializer(
|
||||
serial.extract(
|
||||
deserialize_vluint(serial)
|
||||
if flag == IonBinary.VARIABLE_LEN_FLAG
|
||||
else flag
|
||||
)
|
||||
)
|
||||
result = IonStruct()
|
||||
|
||||
while len(serial2):
|
||||
id_symbol = self.symtab.get_symbol(deserialize_vluint(serial2))
|
||||
|
||||
value = self.deserialize_value(serial2)
|
||||
if DEBUG:
|
||||
log.debug("IonStruct: %s = %s" % (repr(id_symbol), repr(value)))
|
||||
|
||||
if not isinstance(value, IonNop):
|
||||
if id_symbol in result:
|
||||
log.error("BinaryIonStruct: Duplicate field name %s" % id_symbol)
|
||||
|
||||
result[id_symbol] = value
|
||||
|
||||
return result
|
||||
|
||||
ANNOTATION_VALUE_SIGNATURE = 14
|
||||
|
||||
def serialize_annotation_value(self, value):
|
||||
if not value.annotations:
|
||||
raise Exception("Serializing IonAnnotation without annotations")
|
||||
|
||||
serial = Serializer()
|
||||
|
||||
annotation_data = Serializer()
|
||||
for annotation in value.annotations:
|
||||
annotation_data.append(serialize_vluint(self.symtab.get_id(annotation)))
|
||||
|
||||
serial.append(serialize_vluint(len(annotation_data)))
|
||||
serial.append(annotation_data.serialize())
|
||||
|
||||
serial.append(self.serialize_value(value.value))
|
||||
|
||||
return (IonBinary.ANNOTATION_VALUE_SIGNATURE, serial.serialize())
|
||||
|
||||
def deserialize_annotation_value(self, data):
|
||||
serial = Deserializer(data)
|
||||
|
||||
annotation_length = deserialize_vluint(serial)
|
||||
annotation_data = Deserializer(serial.extract(annotation_length))
|
||||
|
||||
ion_value = self.deserialize_value(serial)
|
||||
if len(serial):
|
||||
raise Exception(
|
||||
"IonAnnotation has excess data: %s"
|
||||
% bytes_to_separated_hex(serial.extract())
|
||||
)
|
||||
|
||||
annotations = []
|
||||
while len(annotation_data):
|
||||
annotations.append(
|
||||
self.symtab.get_symbol(deserialize_vluint(annotation_data))
|
||||
)
|
||||
|
||||
if len(annotations) == 0:
|
||||
raise Exception("IonAnnotation has no annotations")
|
||||
|
||||
return IonAnnotation(annotations, ion_value)
|
||||
|
||||
RESERVED_VALUE_SIGNATURE = 15
|
||||
|
||||
def deserialize_reserved_value(self, data):
|
||||
raise Exception(
|
||||
"Deserialize reserved ion value signature %d" % self.value_signature
|
||||
)
|
||||
|
||||
VALUE_DESERIALIZERS = {
|
||||
NULL_VALUE_SIGNATURE: (False, deserialize_null_value, "null"),
|
||||
BOOL_VALUE_SIGNATURE: (False, deserialize_bool_value, "bool"),
|
||||
POSINT_VALUE_SIGNATURE: (True, deserialize_posint_value, "int"),
|
||||
NEGINT_VALUE_SIGNATURE: (True, deserialize_negint_value, "int"),
|
||||
FLOAT_VALUE_SIGNATURE: (True, deserialize_float_value, "float"),
|
||||
DECIMAL_VALUE_SIGNATURE: (True, deserialize_decimal_value, "decimal"),
|
||||
TIMESTAMP_VALUE_SIGNATURE: (True, deserialize_timestamp_value, "timestamp"),
|
||||
SYMBOL_VALUE_SIGNATURE: (True, deserialize_symbol_value, "symbol"),
|
||||
STRING_VALUE_SIGNATURE: (True, deserialize_string_value, "string"),
|
||||
CLOB_VALUE_SIGNATURE: (True, deserialize_clob_value, "clob"),
|
||||
BLOB_VALUE_SIGNATURE: (True, deserialize_blob_value, "blob"),
|
||||
LIST_VALUE_SIGNATURE: (True, deserialize_list_value, "list"),
|
||||
SEXP_VALUE_SIGNATURE: (True, deserialize_sexp_value, "sexp"),
|
||||
STRUCT_VALUE_SIGNATURE: (False, deserialize_struct_value, "struct"),
|
||||
ANNOTATION_VALUE_SIGNATURE: (True, deserialize_annotation_value, "annotation"),
|
||||
RESERVED_VALUE_SIGNATURE: (True, deserialize_reserved_value, "reserved"),
|
||||
}
|
||||
|
||||
ION_TYPE_HANDLERS = {
|
||||
IonAnnotation: serialize_annotation_value,
|
||||
IonBLOB: serialize_blob_value,
|
||||
IonBool: serialize_bool_value,
|
||||
IonCLOB: serialize_clob_value,
|
||||
IonDecimal: serialize_decimal_value,
|
||||
IonFloat: serialize_float_value,
|
||||
IonInt: serialize_int_value,
|
||||
IonList: serialize_list_value,
|
||||
IonNull: serialize_null_value,
|
||||
IonSExp: serialize_sexp_value,
|
||||
IonString: serialize_string_value,
|
||||
IonStruct: serialize_struct_value,
|
||||
IonSymbol: serialize_symbol_value,
|
||||
IonTimestamp: serialize_timestamp_value,
|
||||
}
|
||||
|
||||
|
||||
def descriptor(signature, flag):
|
||||
if flag < 0 or flag > 0x0F:
|
||||
raise Exception("Serialize bad descriptor flag: %d" % flag)
|
||||
|
||||
return bytes_([(signature << 4) + flag])
|
||||
|
||||
|
||||
def serialize_unsignedint(value):
|
||||
return ltrim0(struct.pack(">Q", value))
|
||||
|
||||
|
||||
def deserialize_unsignedint(data):
|
||||
if len(data) > 0 and bytes_indexed(data, 0) == 0:
|
||||
raise Exception("BinaryIonInt data padded with 0x00")
|
||||
|
||||
return struct.unpack_from(">Q", lpad0(data, 8))[0]
|
||||
|
||||
|
||||
def serialize_signedint(value):
|
||||
data = ltrim0x(struct.pack(">Q", abs(value)))
|
||||
|
||||
if value < 0:
|
||||
data = or_first_byte(data, 0x80)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def deserialize_signedint(data):
|
||||
if len(data) == 0:
|
||||
return 0
|
||||
|
||||
if (bytes_indexed(data, 0) & 0x80) != 0:
|
||||
return -(struct.unpack_from(">Q", lpad0(and_first_byte(data, 0x7F), 8))[0])
|
||||
|
||||
return struct.unpack_from(">Q", lpad0(data, 8))[0]
|
||||
|
||||
|
||||
def serialize_vluint(value):
|
||||
if value < 0:
|
||||
raise Exception("Cannot serialize negative value as IonVLUInt: %d" % value)
|
||||
|
||||
datalst = [(value & 0x7F) + 0x80]
|
||||
while True:
|
||||
value = value >> 7
|
||||
if value == 0:
|
||||
return bytes_(datalst)
|
||||
|
||||
datalst.insert(0, value & 0x7F)
|
||||
|
||||
|
||||
def deserialize_vluint(serial):
|
||||
value = 0
|
||||
while True:
|
||||
i = serial.unpack("B")
|
||||
value = (value << 7) | (i & 0x7F)
|
||||
|
||||
if i & 0x80:
|
||||
return value
|
||||
|
||||
if value == 0:
|
||||
raise Exception("IonVLUInt padded with 0x00")
|
||||
|
||||
if value > 0x7FFFFFFFFFFFFF:
|
||||
raise Exception("IonVLUInt data value is too large, missing terminator")
|
||||
|
||||
|
||||
def serialize_vlsint(value):
|
||||
if value is None:
|
||||
return b"\xc0"
|
||||
|
||||
data = serialize_vluint(abs(value))
|
||||
|
||||
if bytes_indexed(data, 0) & 0x40:
|
||||
data = b"\x00" + data
|
||||
|
||||
if value < 0:
|
||||
data = or_first_byte(data, 0x40)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def deserialize_vlsint(serial, allow_minus_zero=False):
|
||||
first = serial.unpack("B")
|
||||
ibyte = first & 0xBF
|
||||
|
||||
datalst = []
|
||||
if ibyte != 0:
|
||||
datalst.append(ibyte)
|
||||
|
||||
while (ibyte & 0x80) == 0:
|
||||
ibyte = serial.unpack("B")
|
||||
datalst.append(ibyte)
|
||||
|
||||
value = deserialize_vluint(Deserializer(bytes_(datalst)))
|
||||
|
||||
if first & 0x40:
|
||||
if value:
|
||||
value = -value
|
||||
elif allow_minus_zero:
|
||||
value = None
|
||||
else:
|
||||
raise Exception("deserialize_vlsint unexpected -0 value")
|
||||
|
||||
return value
|
||||
|
||||
|
||||
def lpad0(data, size):
|
||||
if len(data) > size:
|
||||
extra = len(data) - size
|
||||
if data[:size] != b"\x00" * extra:
|
||||
raise Exception(
|
||||
"lpad0, length (%d) > max (%d): %s"
|
||||
% (len(data), size, bytes_to_separated_hex(data))
|
||||
)
|
||||
|
||||
return data[:size]
|
||||
|
||||
return b"\x00" * (size - len(data)) + data
|
||||
|
||||
|
||||
def ltrim0(data):
|
||||
while len(data) and bytes_indexed(data, 0) == 0:
|
||||
data = data[1:]
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def ltrim0x(data):
|
||||
while len(data) and bytes_indexed(data, 0) == 0:
|
||||
if len(data) > 1 and (bytes_indexed(data, 1) & 0x80):
|
||||
break
|
||||
|
||||
data = data[1:]
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def combine_decimal_digits(digits, sign_negative):
|
||||
val = 0
|
||||
|
||||
for digit in digits:
|
||||
val = (val * 10) + digit
|
||||
|
||||
if sign_negative:
|
||||
val = -val
|
||||
|
||||
return val
|
||||
|
||||
|
||||
def and_first_byte(data, mask):
|
||||
return bytes_([bytes_indexed(data, 0) & mask]) + data[1:]
|
||||
|
||||
|
||||
def or_first_byte(data, mask):
|
||||
return bytes_([bytes_indexed(data, 0) | mask]) + data[1:]
|
||||
460
kindle_download_helper/third_party/kfxlib/ion_symbol_table.py
vendored
Normal file
460
kindle_download_helper/third_party/kfxlib/ion_symbol_table.py
vendored
Normal file
@@ -0,0 +1,460 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from .ion import (
|
||||
IS,
|
||||
IonAnnotation,
|
||||
IonStruct,
|
||||
IonSymbol,
|
||||
ion_type,
|
||||
isstring,
|
||||
unannotated,
|
||||
)
|
||||
from .message_logging import log
|
||||
from .python_transition import IS_PYTHON2
|
||||
from .utilities import list_symbols, quote_name, type_name
|
||||
from .yj_symbol_catalog import SYSTEM_SYMBOL_TABLE, YJ_SYMBOLS, IonSharedSymbolTable
|
||||
|
||||
if IS_PYTHON2:
|
||||
from .python_transition import repr
|
||||
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2016-2022, John Howell <jhowell@acm.org>"
|
||||
|
||||
DEBUG = False
|
||||
REPORT_ALL_USED_SYMBOLS = False
|
||||
|
||||
|
||||
class SymbolTableCatalog(object):
|
||||
def __init__(self, add_global_shared_symbol_tables=False):
|
||||
self.shared_symbol_tables = {}
|
||||
self.clear()
|
||||
|
||||
if add_global_shared_symbol_tables:
|
||||
self.add_global_shared_symbol_tables()
|
||||
|
||||
def clear(self):
|
||||
self.shared_symbol_tables.clear()
|
||||
self.add_shared_symbol_table(SYSTEM_SYMBOL_TABLE)
|
||||
|
||||
def add_global_shared_symbol_tables(self):
|
||||
self.add_shared_symbol_table(YJ_SYMBOLS)
|
||||
|
||||
def add_shared_symbol_table(self, shared_symbol_table):
|
||||
self.shared_symbol_tables[
|
||||
(shared_symbol_table.name, shared_symbol_table.version)
|
||||
] = shared_symbol_table
|
||||
|
||||
if (
|
||||
shared_symbol_table.name not in self.shared_symbol_tables
|
||||
or shared_symbol_table.version
|
||||
>= self.shared_symbol_tables[(shared_symbol_table.name, None)].version
|
||||
):
|
||||
self.shared_symbol_tables[
|
||||
(shared_symbol_table.name, None)
|
||||
] = shared_symbol_table
|
||||
|
||||
def create_shared_symbol_table(self, symbol_table_data):
|
||||
self.add_shared_symbol_table(
|
||||
IonSharedSymbolTable(
|
||||
symbol_table_data["name"],
|
||||
symbol_table_data["version"] if "version" in symbol_table_data else 1,
|
||||
symbol_table_data["symbols"] if "symbols" in symbol_table_data else [],
|
||||
)
|
||||
)
|
||||
|
||||
def get_shared_symbol_table(self, name, version=None):
|
||||
return self.shared_symbol_tables.get(
|
||||
(name, version)
|
||||
) or self.shared_symbol_tables.get((name, None))
|
||||
|
||||
|
||||
global_catalog = SymbolTableCatalog(add_global_shared_symbol_tables=True)
|
||||
|
||||
|
||||
class SymbolTableImport(object):
|
||||
def __init__(self, name, version, max_id):
|
||||
self.name = name
|
||||
self.version = version
|
||||
self.max_id = max_id
|
||||
|
||||
|
||||
class LocalSymbolTable(object):
|
||||
def __init__(
|
||||
self,
|
||||
initial_import=None,
|
||||
context="",
|
||||
ignore_undef=False,
|
||||
catalog=global_catalog,
|
||||
):
|
||||
self.context = context
|
||||
self.ignore_undef = ignore_undef
|
||||
self.catalog = catalog
|
||||
|
||||
self.undefined_ids = set()
|
||||
self.undefined_symbols = set()
|
||||
self.unexpected_used_symbols = set()
|
||||
self.reported = False
|
||||
self.clear()
|
||||
self.set_translation(None)
|
||||
|
||||
if initial_import:
|
||||
self.import_shared_symbol_table(initial_import)
|
||||
|
||||
def clear(self):
|
||||
self.table_imports = []
|
||||
self.symbols = []
|
||||
self.id_of_symbol = {}
|
||||
self.symbol_of_id = {}
|
||||
self.unexpected_ids = set()
|
||||
self.creating_local_symbols = False
|
||||
self.creating_yj_local_symbols = False
|
||||
|
||||
self.import_symbols(self.catalog.get_shared_symbol_table("$ion").symbols)
|
||||
self.local_min_id = len(self.symbols) + 1
|
||||
|
||||
def create(self, symbol_table_data, yj_local_symbols=False):
|
||||
if "imports" in symbol_table_data:
|
||||
imports = symbol_table_data["imports"]
|
||||
if ion_type(imports) is IonSymbol:
|
||||
if imports != "$ion_symbol_table":
|
||||
raise Exception("Unexpected imports value: %s" % imports)
|
||||
else:
|
||||
self.clear()
|
||||
|
||||
for sym_import in imports:
|
||||
self.import_shared_symbol_table(
|
||||
sym_import["name"],
|
||||
sym_import.get("version") or 1,
|
||||
sym_import.get("max_id"),
|
||||
)
|
||||
else:
|
||||
self.clear()
|
||||
|
||||
symbol_list = (
|
||||
unannotated(symbol_table_data["symbols"])
|
||||
if "symbols" in symbol_table_data
|
||||
else []
|
||||
)
|
||||
|
||||
self.creating_local_symbols = True
|
||||
self.import_symbols(symbol_list)
|
||||
|
||||
if "max_id" in symbol_table_data:
|
||||
expected_max_id = symbol_table_data["max_id"]
|
||||
if expected_max_id is not None and expected_max_id != len(self.symbols):
|
||||
log.error(
|
||||
"Symbol table max_id after import expected %d, found %d"
|
||||
% (expected_max_id, len(self.symbols))
|
||||
)
|
||||
|
||||
def import_shared_symbol_table(self, name, version=None, max_id=None):
|
||||
if DEBUG:
|
||||
log.debug(
|
||||
"Importing ion symbol table %s version %s max_id %s"
|
||||
% (quote_name(name), version, max_id)
|
||||
)
|
||||
|
||||
if self.creating_local_symbols:
|
||||
raise Exception(
|
||||
"Importing shared symbols after local symbols have been created"
|
||||
)
|
||||
|
||||
if name == "$ion":
|
||||
return
|
||||
|
||||
symbol_table = self.catalog.get_shared_symbol_table(name, version)
|
||||
|
||||
if symbol_table is None:
|
||||
log.error("Imported shared symbol table %s is unknown" % name)
|
||||
symbol_table = IonSharedSymbolTable(name=name, version=version)
|
||||
|
||||
if version is None:
|
||||
version = symbol_table.version
|
||||
elif symbol_table.version != version:
|
||||
if max_id is None:
|
||||
log.error(
|
||||
"Import version %d of shared symbol table %s without max_id, but have version %d"
|
||||
% (version, name, symbol_table.version)
|
||||
)
|
||||
else:
|
||||
log.warning(
|
||||
"Import version %d of shared symbol table %s, but have version %d"
|
||||
% (version, name, symbol_table.version)
|
||||
)
|
||||
|
||||
table_len = len(symbol_table.symbols)
|
||||
|
||||
if max_id is None:
|
||||
max_id = table_len
|
||||
|
||||
if max_id < 0:
|
||||
raise Exception(
|
||||
"Import symbol table %s version %d max_id %d is invalid"
|
||||
% (name, version, max_id)
|
||||
)
|
||||
|
||||
self.table_imports.append(SymbolTableImport(name, version, max_id))
|
||||
|
||||
if max_id < table_len:
|
||||
symbol_list = symbol_table.symbols[:max_id]
|
||||
elif max_id > table_len:
|
||||
if table_len > 0:
|
||||
prior_len = len(self.symbols)
|
||||
log.warning(
|
||||
"Import symbol table %s version %d max_id %d(+%d=%d) exceeds known table size %d(+%d=%d)"
|
||||
% (
|
||||
name,
|
||||
version,
|
||||
max_id,
|
||||
prior_len,
|
||||
max_id + prior_len,
|
||||
table_len,
|
||||
prior_len,
|
||||
table_len + prior_len,
|
||||
)
|
||||
)
|
||||
|
||||
symbol_list = symbol_table.symbols + ([None] * (max_id - table_len))
|
||||
else:
|
||||
symbol_list = symbol_table.symbols
|
||||
|
||||
self.import_symbols(symbol_list)
|
||||
self.local_min_id = len(self.symbols) + 1
|
||||
|
||||
def import_symbols(self, symbols):
|
||||
for symbol in symbols:
|
||||
symbol = unannotated(symbol)
|
||||
|
||||
if symbol is not None:
|
||||
if not isstring(symbol):
|
||||
log.error(
|
||||
"imported symbol %s is type %s, treating as null"
|
||||
% (symbol, type_name(symbol))
|
||||
)
|
||||
symbol = None
|
||||
|
||||
self.add_symbol(symbol)
|
||||
|
||||
def create_local_symbol(self, symbol):
|
||||
self.creating_local_symbols = True
|
||||
|
||||
if symbol not in self.id_of_symbol:
|
||||
self.add_symbol(symbol)
|
||||
|
||||
return IonSymbol(symbol)
|
||||
|
||||
def add_symbol(self, symbol):
|
||||
if symbol is None:
|
||||
self.symbols.append(None)
|
||||
return -1
|
||||
|
||||
if not isstring(symbol):
|
||||
raise Exception(
|
||||
"symbol %s is type %s, not string" % (symbol, type_name(symbol))
|
||||
)
|
||||
|
||||
if len(symbol) == 0:
|
||||
raise Exception("symbol has zero length")
|
||||
|
||||
expected = True
|
||||
|
||||
if not self.creating_local_symbols:
|
||||
if symbol.endswith("?"):
|
||||
symbol = symbol[:-1]
|
||||
expected = False
|
||||
elif REPORT_ALL_USED_SYMBOLS:
|
||||
expected = False
|
||||
|
||||
self.symbols.append(symbol)
|
||||
|
||||
if symbol not in self.id_of_symbol:
|
||||
symbol_id = len(self.symbols)
|
||||
self.id_of_symbol[symbol] = symbol_id
|
||||
self.symbol_of_id[symbol_id] = symbol
|
||||
else:
|
||||
self.symbol_of_id[len(self.symbols)] = symbol
|
||||
symbol_id = self.id_of_symbol[symbol]
|
||||
log.error("Symbol %s already exists with id %d" % (symbol, symbol_id))
|
||||
|
||||
if not expected:
|
||||
self.unexpected_ids.add(symbol_id)
|
||||
|
||||
return symbol_id
|
||||
|
||||
def get_symbol(self, symbol_id):
|
||||
if not isinstance(symbol_id, int):
|
||||
raise Exception(
|
||||
"get_symbol: symbol id must be integer not %s: %s"
|
||||
% (type_name(symbol_id), repr(symbol_id))
|
||||
)
|
||||
|
||||
symbol = self.symbol_of_id.get(symbol_id)
|
||||
|
||||
if symbol is None:
|
||||
symbol = "$%d" % symbol_id
|
||||
self.undefined_ids.add(symbol_id)
|
||||
|
||||
if symbol_id in self.unexpected_ids:
|
||||
self.unexpected_used_symbols.add(symbol)
|
||||
|
||||
return IonSymbol(symbol)
|
||||
|
||||
def get_id(self, ion_symbol, used=True):
|
||||
if not isinstance(ion_symbol, IonSymbol):
|
||||
raise Exception(
|
||||
"get_id: symbol must be IonSymbol not %s: %s"
|
||||
% (type_name(ion_symbol), repr(ion_symbol))
|
||||
)
|
||||
|
||||
symbol = ion_symbol.tostring()
|
||||
|
||||
if symbol.startswith("$") and re.match(r"^\$[0-9]+$", symbol):
|
||||
symbol_id = int(symbol[1:])
|
||||
|
||||
if symbol_id not in self.symbol_of_id:
|
||||
self.undefined_ids.add(symbol_id)
|
||||
else:
|
||||
symbol_id = self.id_of_symbol.get(symbol)
|
||||
|
||||
if symbol_id is None:
|
||||
if used:
|
||||
self.undefined_symbols.add(symbol)
|
||||
|
||||
symbol_id = 0
|
||||
|
||||
if used and symbol_id in self.unexpected_ids:
|
||||
self.unexpected_used_symbols.add(symbol)
|
||||
|
||||
return symbol_id
|
||||
|
||||
def is_shared_symbol(self, ion_symbol):
|
||||
symbol_id = self.get_id(ion_symbol, used=False)
|
||||
return symbol_id > 0 and symbol_id < self.local_min_id
|
||||
|
||||
def is_local_symbol(self, ion_symbol):
|
||||
return self.get_id(ion_symbol, used=False) >= self.local_min_id
|
||||
|
||||
def replace_local_symbols(self, new_symbols):
|
||||
self.discard_local_symbols()
|
||||
self.import_symbols(new_symbols)
|
||||
|
||||
def get_local_symbols(self):
|
||||
return self.symbols[self.local_min_id - 1 :]
|
||||
|
||||
def discard_local_symbols(self):
|
||||
symbol_id = self.local_min_id
|
||||
for symbol in self.symbols[self.local_min_id - 1 :]:
|
||||
self.id_of_symbol.pop(symbol)
|
||||
self.symbol_of_id.pop(symbol_id)
|
||||
symbol_id += 1
|
||||
|
||||
self.symbols = self.symbols[: self.local_min_id - 1]
|
||||
|
||||
def create_import(self, imports_only=False):
|
||||
if not self.symbols:
|
||||
return None
|
||||
|
||||
symbol_table_data = IonStruct()
|
||||
|
||||
if not imports_only:
|
||||
symbol_table_data[IS("max_id")] = len(self.symbols)
|
||||
|
||||
symbol_table_data[IS("imports")] = [
|
||||
IonStruct(
|
||||
IS("name"),
|
||||
table_import.name,
|
||||
IS("version"),
|
||||
table_import.version,
|
||||
IS("max_id"),
|
||||
table_import.max_id,
|
||||
)
|
||||
for table_import in self.table_imports
|
||||
]
|
||||
|
||||
if not imports_only:
|
||||
symbol_table_data[IS("symbols")] = self.symbols[self.local_min_id - 1 :]
|
||||
|
||||
return IonAnnotation([IS("$ion_symbol_table")], symbol_table_data)
|
||||
|
||||
def set_translation(self, alt_symbol_table):
|
||||
self.import_translate = {}
|
||||
self.export_translate = {}
|
||||
|
||||
if alt_symbol_table is None:
|
||||
return
|
||||
|
||||
offset = len(self.catalog.get_shared_symbol_table("$ion").symbols) + 1
|
||||
|
||||
for table_import in self.table_imports:
|
||||
if table_import.name == alt_symbol_table.name:
|
||||
orig_symbol_table = self.catalog.get_shared_symbol_table(
|
||||
table_import.name, table_import.version
|
||||
)
|
||||
for idx in range(
|
||||
max(len(orig_symbol_table.symbols), len(alt_symbol_table.symbols))
|
||||
):
|
||||
have_orig = idx < len(orig_symbol_table.symbols)
|
||||
have_alt = idx < len(alt_symbol_table.symbols)
|
||||
|
||||
orig_symbol = (
|
||||
orig_symbol_table.symbols[idx]
|
||||
if have_orig
|
||||
else "$%d" % (idx + offset)
|
||||
)
|
||||
if orig_symbol.endswith("?"):
|
||||
orig_symbol = orig_symbol[:-1]
|
||||
|
||||
alt_symbol = (
|
||||
alt_symbol_table.symbols[idx]
|
||||
if have_alt
|
||||
else "$%d" % (idx + offset)
|
||||
)
|
||||
|
||||
if have_alt:
|
||||
self.import_translate[alt_symbol] = orig_symbol
|
||||
|
||||
if have_orig:
|
||||
self.export_translate[orig_symbol] = alt_symbol
|
||||
|
||||
break
|
||||
|
||||
offset += table_import.max_id
|
||||
|
||||
def __repr__(self):
|
||||
return "symbols: %s; id_of_symbol %s; symbol_of_id %s" % (
|
||||
repr(self.symbols),
|
||||
repr(self.id_of_symbol),
|
||||
repr(self.symbol_of_id),
|
||||
)
|
||||
|
||||
def report(self):
|
||||
if self.reported:
|
||||
return
|
||||
|
||||
context = ("%s: " % self.context) if self.context else ""
|
||||
|
||||
if self.unexpected_used_symbols:
|
||||
log.error(
|
||||
"%sUnexpected Ion symbols used: %s"
|
||||
% (context, list_symbols(self.unexpected_used_symbols))
|
||||
)
|
||||
|
||||
if self.undefined_symbols and not self.ignore_undef:
|
||||
log.error(
|
||||
"%sUndefined Ion symbols found: %s"
|
||||
% (
|
||||
context,
|
||||
", ".join([quote_name(s) for s in sorted(self.undefined_symbols)]),
|
||||
)
|
||||
)
|
||||
|
||||
if self.undefined_ids:
|
||||
log.error(
|
||||
"%sUndefined Ion symbol IDs found: %s"
|
||||
% (context, list_symbols(self.undefined_ids))
|
||||
)
|
||||
|
||||
self.reported = True
|
||||
1281
kindle_download_helper/third_party/kfxlib/ion_text.py
vendored
Normal file
1281
kindle_download_helper/third_party/kfxlib/ion_text.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
164
kindle_download_helper/third_party/kfxlib/jxr_container.py
vendored
Normal file
164
kindle_download_helper/third_party/kfxlib/jxr_container.py
vendored
Normal file
@@ -0,0 +1,164 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import sys
|
||||
import uuid
|
||||
|
||||
from .jxr_image import JXRImage
|
||||
from .jxr_misc import Deserializer, bytes_to_separated_hex
|
||||
from .message_logging import log
|
||||
|
||||
if sys.version_info[0] == 2:
|
||||
str = type("")
|
||||
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2016-2022, John Howell <jhowell@acm.org>"
|
||||
|
||||
|
||||
FIELD_TYPE_LEN = {
|
||||
1: 1,
|
||||
2: 1,
|
||||
3: 2,
|
||||
4: 4,
|
||||
5: 8,
|
||||
6: 1,
|
||||
7: 1,
|
||||
8: 2,
|
||||
9: 4,
|
||||
10: 8,
|
||||
11: 4,
|
||||
12: 8,
|
||||
}
|
||||
|
||||
LEN_FMT = {
|
||||
1: "B",
|
||||
2: "s",
|
||||
3: "<H",
|
||||
4: "<L",
|
||||
6: "b",
|
||||
7: "s",
|
||||
8: "<h",
|
||||
9: "<l",
|
||||
11: "<f",
|
||||
12: "<d",
|
||||
}
|
||||
|
||||
SUPPORTED_PIXEL_FORMATS = {
|
||||
"24c3dd6f-034e-fe4b-b185-3d77768dc905": "BlackWhite",
|
||||
"24c3dd6f-034e-fe4b-b185-3d77768dc908": "8bppGray",
|
||||
"24c3dd6f-034e-fe4b-b185-3d77768dc90b": "16bppGray",
|
||||
"24c3dd6f-034e-fe4b-b185-3d77768dc90c": "24bppBGR",
|
||||
"24c3dd6f-034e-fe4b-b185-3d77768dc90d": "24bppRGB",
|
||||
"24c3dd6f-034e-fe4b-b185-3d77768dc90f": "32bppRGBA",
|
||||
"24c3dd6f-034e-fe4b-b185-3d77768dc920": "24bpp3Channels",
|
||||
"24c3dd6f-034e-fe4b-b185-3d77768dc921": "32bpp4Channels",
|
||||
}
|
||||
|
||||
|
||||
class JXRContainer(object):
|
||||
def __init__(self, data):
|
||||
header = Deserializer(data)
|
||||
|
||||
tif_signature = header.extract(4)
|
||||
if tif_signature != b"\x49\x49\xbc\x01":
|
||||
raise Exception(
|
||||
"TIF signature is incorrect: %s" % bytes_to_separated_hex(tif_signature)
|
||||
)
|
||||
|
||||
ifd_offset = header.unpack("<L", "ifd_offset")
|
||||
header.extract(ifd_offset - header.offset)
|
||||
|
||||
pixel_format = ""
|
||||
self.image_width = (
|
||||
self.image_height
|
||||
) = image_offset = image_byte_count = self.image_data = None
|
||||
|
||||
num_entries = header.unpack("<H", "num_entries")
|
||||
|
||||
def field_value():
|
||||
return Deserializer(field_data).unpack(LEN_FMT[field_type], "field_value")
|
||||
|
||||
for i in range(num_entries):
|
||||
field_tag = header.unpack("<H", "field_tag")
|
||||
field_type = header.unpack("<H", "field_type")
|
||||
field_count = header.unpack("<L", "field_count")
|
||||
|
||||
field_data_len = FIELD_TYPE_LEN[field_type] * field_count
|
||||
if field_data_len <= 4:
|
||||
field_data = header.extract(field_data_len)
|
||||
header.extract(4 - field_data_len)
|
||||
else:
|
||||
field_data_or_offset = header.unpack("<L", "field_data_or_offset")
|
||||
field_data = data[
|
||||
field_data_or_offset : field_data_or_offset + field_data_len
|
||||
]
|
||||
|
||||
if field_tag == 0xBC01:
|
||||
pixel_format = str(uuid.UUID(bytes=field_data))
|
||||
elif field_tag == 0xBC80:
|
||||
self.image_width = field_value()
|
||||
elif field_tag == 0xBC81:
|
||||
self.image_height = field_value()
|
||||
elif field_tag == 0xBCC0:
|
||||
image_offset = field_value()
|
||||
elif field_tag == 0xBCC1:
|
||||
image_byte_count = field_value()
|
||||
|
||||
if not (
|
||||
pixel_format
|
||||
and self.image_width
|
||||
and self.image_height
|
||||
and image_offset
|
||||
and (image_byte_count is not None)
|
||||
):
|
||||
raise Exception(
|
||||
"Missing required TIFF field tag: pixel_format=%s width=%s height=%s offset=%s byte-count=%s"
|
||||
% (
|
||||
pixel_format,
|
||||
self.image_width,
|
||||
self.image_height,
|
||||
image_offset,
|
||||
image_byte_count,
|
||||
)
|
||||
)
|
||||
|
||||
if pixel_format not in SUPPORTED_PIXEL_FORMATS:
|
||||
log.warning("Unsupported pixel format: %s" % pixel_format)
|
||||
|
||||
ifd_offset = header.unpack("<L", "ifd_offset")
|
||||
if ifd_offset != 0:
|
||||
raise Exception(
|
||||
"File contains multiple images - only a single image is supported"
|
||||
)
|
||||
|
||||
if image_byte_count > 0:
|
||||
self.image_data = data[image_offset : image_offset + image_byte_count]
|
||||
|
||||
if len(self.image_data) < image_byte_count:
|
||||
log.warning(
|
||||
"File is truncated (missing %d bytes of image data)"
|
||||
% (image_byte_count - len(self.image_data))
|
||||
)
|
||||
else:
|
||||
self.image_data = data[image_offset:]
|
||||
|
||||
def unpack_image(self):
|
||||
jxr_image = JXRImage(self.image_data)
|
||||
|
||||
im = jxr_image.decode()
|
||||
|
||||
if (
|
||||
jxr_image.image_width != self.image_width
|
||||
or jxr_image.image_height != self.image_height
|
||||
):
|
||||
log.warning(
|
||||
"Expected image size %dx%d but found %dx%d"
|
||||
% (
|
||||
self.image_width,
|
||||
self.image_height,
|
||||
jxr_image.image_width,
|
||||
jxr_image.image_height,
|
||||
)
|
||||
)
|
||||
|
||||
return im
|
||||
3264
kindle_download_helper/third_party/kfxlib/jxr_image.py
vendored
Normal file
3264
kindle_download_helper/third_party/kfxlib/jxr_image.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
122
kindle_download_helper/third_party/kfxlib/jxr_misc.py
vendored
Normal file
122
kindle_download_helper/third_party/kfxlib/jxr_misc.py
vendored
Normal file
@@ -0,0 +1,122 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import struct
|
||||
|
||||
from .message_logging import log
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2016-2022, John Howell <jhowell@acm.org>"
|
||||
|
||||
|
||||
DEBUG = False
|
||||
|
||||
|
||||
class Deserializer(object):
|
||||
def __init__(self, data):
|
||||
self.buffer = data
|
||||
self.offset = 0
|
||||
self.bits_remaining = self.remainder = 0
|
||||
|
||||
def extract(self, size=None, upto=None, advance=True, check_remaining=True):
|
||||
if check_remaining and self.bits_remaining:
|
||||
raise Exception(
|
||||
"Deserializer: unexpected %d bit remaining" % self.bits_remaining
|
||||
)
|
||||
|
||||
if size is None:
|
||||
size = len(self) if upto is None else (upto - self.offset)
|
||||
|
||||
data = self.buffer[self.offset : self.offset + size]
|
||||
|
||||
if len(data) < size or size < 0:
|
||||
raise Exception(
|
||||
"Deserializer: Insufficient data (need %d bytes, have %d bytes)"
|
||||
% (size, len(data))
|
||||
)
|
||||
|
||||
if advance:
|
||||
self.offset += size
|
||||
|
||||
return data
|
||||
|
||||
def unpack(self, fmt, name="", advance=True):
|
||||
if self.bits_remaining:
|
||||
raise Exception(
|
||||
"Deserializer: unexpected %d bit remaining" % self.bits_remaining
|
||||
)
|
||||
|
||||
result = struct.unpack_from(fmt, self.buffer, self.offset)[0]
|
||||
|
||||
if DEBUG:
|
||||
log.info("%d: unpack(%s)=%s %s" % (self.offset, fmt, repr(result), name))
|
||||
|
||||
if advance:
|
||||
self.offset += struct.calcsize(fmt)
|
||||
|
||||
return result
|
||||
|
||||
def unpack_bits(self, size, name=""):
|
||||
while self.bits_remaining < size:
|
||||
self.remainder = (self.remainder << 8) + ord(
|
||||
self.extract(1, check_remaining=False)
|
||||
)
|
||||
self.bits_remaining += 8
|
||||
|
||||
self.bits_remaining -= size
|
||||
value = self.remainder >> self.bits_remaining
|
||||
|
||||
if value > (1 << size) - 1:
|
||||
raise Exception()
|
||||
|
||||
self.remainder = self.remainder & (0xFF >> (8 - self.bits_remaining))
|
||||
|
||||
if DEBUG:
|
||||
log.info(
|
||||
"%d: unpack_bits(%d)=%u (%s) %s"
|
||||
% (self.offset, size, value, ("{0:0%sb}" % size).format(value), name)
|
||||
)
|
||||
|
||||
return value
|
||||
|
||||
def unpack_flag(self, name=""):
|
||||
return self.unpack_bits(1, name) == 1
|
||||
|
||||
def push_bit(self, value):
|
||||
self.remainder &= (value & 1) << self.bits_remaining
|
||||
self.bits_remaining += 1
|
||||
|
||||
def check_bit_field(self, size, name, expected_values, name_table={}):
|
||||
def value_name(v):
|
||||
return name_table.get(v, "%d" % v)
|
||||
|
||||
value = self.unpack_bits(size, name)
|
||||
|
||||
if value not in expected_values:
|
||||
msg = "%s value %s is unsupported (only %s allowed)" % (
|
||||
name,
|
||||
value_name(value),
|
||||
", ".join([value_name(ev) for ev in expected_values]),
|
||||
)
|
||||
raise Exception(msg)
|
||||
|
||||
return value
|
||||
|
||||
def huff(self, table, name):
|
||||
k = 1
|
||||
while k <= 0xFF:
|
||||
k = (k << 1) + self.unpack_bits(1, name)
|
||||
v = table.get(k)
|
||||
if v is not None:
|
||||
return v
|
||||
|
||||
raise Exception("decode using huffman table failed")
|
||||
|
||||
def discard_remainder_bits(self):
|
||||
self.bits_remaining = self.remainder = 0
|
||||
|
||||
def __len__(self):
|
||||
return len(self.buffer) - self.offset
|
||||
|
||||
|
||||
def bytes_to_separated_hex(data, sep=" "):
|
||||
return sep.join("%02x" % ord(data[i : i + 1]) for i in range(len(data)))
|
||||
598
kindle_download_helper/third_party/kfxlib/kfx_container.py
vendored
Normal file
598
kindle_download_helper/third_party/kfxlib/kfx_container.py
vendored
Normal file
@@ -0,0 +1,598 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import copy
|
||||
|
||||
from .ion import IS, IonAnnotation, IonBLOB, IonStruct
|
||||
from .ion_binary import IonBinary
|
||||
from .message_logging import log
|
||||
from .python_transition import IS_PYTHON2, bytes_to_hex
|
||||
from .utilities import (
|
||||
Deserializer,
|
||||
Serializer,
|
||||
bytes_to_separated_hex,
|
||||
json_deserialize,
|
||||
json_serialize_compact,
|
||||
sha1,
|
||||
type_name,
|
||||
)
|
||||
from .yj_container import (
|
||||
CONTAINER_FORMAT_KFX_ATTACHABLE,
|
||||
CONTAINER_FORMAT_KFX_MAIN,
|
||||
CONTAINER_FORMAT_KFX_METADATA,
|
||||
CONTAINER_FRAGMENT_TYPES,
|
||||
DRMION_SIGNATURE,
|
||||
RAW_FRAGMENT_TYPES,
|
||||
YJContainer,
|
||||
YJFragment,
|
||||
)
|
||||
from .yj_symbol_catalog import SYSTEM_SYMBOL_TABLE
|
||||
|
||||
if IS_PYTHON2:
|
||||
from .python_transition import repr
|
||||
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2016-2022, John Howell <jhowell@acm.org>"
|
||||
|
||||
|
||||
DEBUG = False
|
||||
|
||||
MAX_KFX_CONTAINER_SIZE = 16 * 1024 * 1024
|
||||
|
||||
DEFAULT_COMPRESSION_TYPE = 0
|
||||
DEFAULT_DRM_SCHEME = 0
|
||||
|
||||
|
||||
KFX_MAIN_CONTAINER_FRAGMENT_IDNUMS = {
|
||||
259,
|
||||
260,
|
||||
538,
|
||||
}
|
||||
|
||||
KFX_METADATA_CONTAINER_FRAGMENT_IDNUMS = {
|
||||
258,
|
||||
419,
|
||||
490,
|
||||
585,
|
||||
}
|
||||
|
||||
KFX_ATTACHABLE_CONTAINER_FRAGMENT_IDNUMS = {
|
||||
417,
|
||||
}
|
||||
|
||||
|
||||
class KfxContainer(YJContainer):
|
||||
SIGNATURE = b"CONT"
|
||||
DRM_SIGNATURE = DRMION_SIGNATURE
|
||||
VERSION = 2
|
||||
ALLOWED_VERSIONS = {1, 2}
|
||||
MIN_LENGTH = 18
|
||||
DEFAULT_CHUNK_SIZE = 4096
|
||||
|
||||
def __init__(self, symtab, datafile=None, fragments=None):
|
||||
YJContainer.__init__(self, symtab, datafile=datafile, fragments=fragments)
|
||||
|
||||
def deserialize(self, ignore_drm=False):
|
||||
self.doc_symbols = None
|
||||
self.format_capabilities = None
|
||||
self.container_info = None
|
||||
self.entities = []
|
||||
self.fragments.clear()
|
||||
|
||||
data = self.datafile.get_data()
|
||||
|
||||
if len(data) < KfxContainer.MIN_LENGTH:
|
||||
raise Exception("Container is too short (%d bytes)" % len(data))
|
||||
|
||||
header = Deserializer(data)
|
||||
signature = header.unpack("4s")
|
||||
version = header.unpack("<H")
|
||||
header_len = header.unpack("<L")
|
||||
|
||||
if signature != KfxContainer.SIGNATURE:
|
||||
pdb_creator = data[64:68]
|
||||
if pdb_creator in [b"MOBI", b"CONT"]:
|
||||
raise Exception(
|
||||
"Found a PDB %s container. This book is not in KFX format."
|
||||
% pdb_creator.decode("utf8")
|
||||
)
|
||||
|
||||
raise Exception(
|
||||
"Container signature is incorrect (%s)"
|
||||
% bytes_to_separated_hex(signature)
|
||||
)
|
||||
|
||||
if version not in KfxContainer.ALLOWED_VERSIONS:
|
||||
log.error("Container version is incorrect (%d)" % version)
|
||||
|
||||
if header_len < KfxContainer.MIN_LENGTH:
|
||||
raise Exception("Container header is too short (%d)" % header_len)
|
||||
|
||||
container_info_offset = header.unpack(b"<L")
|
||||
container_info_length = header.unpack(b"<L")
|
||||
|
||||
container_info_data = data[
|
||||
container_info_offset : container_info_offset + container_info_length
|
||||
]
|
||||
container_info = IonBinary(self.symtab).deserialize_single_value(
|
||||
container_info_data
|
||||
)
|
||||
if DEBUG:
|
||||
log.debug("container info:\n%s" % repr(container_info))
|
||||
|
||||
container_id = container_info.pop("$409", "")
|
||||
|
||||
compression_type = container_info.pop("$410", DEFAULT_COMPRESSION_TYPE)
|
||||
if compression_type != DEFAULT_COMPRESSION_TYPE:
|
||||
log.error(
|
||||
"Unexpected bcComprType in container %s info: %s"
|
||||
% (container_id, repr(compression_type))
|
||||
)
|
||||
|
||||
drm_scheme = container_info.pop("$411", DEFAULT_DRM_SCHEME)
|
||||
if drm_scheme != DEFAULT_DRM_SCHEME:
|
||||
log.error(
|
||||
"Unexpected bcDRMScheme in container %s info: %s"
|
||||
% (container_id, repr(drm_scheme))
|
||||
)
|
||||
|
||||
doc_symbol_offset = container_info.pop("$415", None)
|
||||
doc_symbol_length = container_info.pop("$416", 0)
|
||||
if doc_symbol_length:
|
||||
doc_symbol_data = data[
|
||||
doc_symbol_offset : doc_symbol_offset + doc_symbol_length
|
||||
]
|
||||
self.doc_symbols = IonBinary(self.symtab).deserialize_annotated_value(
|
||||
doc_symbol_data, expect_annotation="$ion_symbol_table"
|
||||
)
|
||||
if DEBUG:
|
||||
log.debug("Document symbols:\n%s" % repr(self.doc_symbols))
|
||||
|
||||
for sym_import in self.doc_symbols.value["imports"]:
|
||||
if "max_id" in sym_import:
|
||||
sym_import["max_id"] -= len(SYSTEM_SYMBOL_TABLE.symbols)
|
||||
|
||||
self.symtab.create(self.doc_symbols.value)
|
||||
|
||||
chunk_size = container_info.pop("$412", KfxContainer.DEFAULT_CHUNK_SIZE)
|
||||
if chunk_size != KfxContainer.DEFAULT_CHUNK_SIZE:
|
||||
log.warning(
|
||||
"Unexpected bcChunkSize in container %s info: %d"
|
||||
% (container_id, chunk_size)
|
||||
)
|
||||
|
||||
if version > 1:
|
||||
format_capabilities_offset = container_info.pop("$594", None)
|
||||
format_capabilities_length = container_info.pop("$595", 0)
|
||||
if format_capabilities_length:
|
||||
format_capabilities_data = data[
|
||||
format_capabilities_offset : format_capabilities_offset
|
||||
+ format_capabilities_length
|
||||
]
|
||||
self.format_capabilities = IonBinary(
|
||||
self.symtab
|
||||
).deserialize_annotated_value(
|
||||
format_capabilities_data, expect_annotation="$593"
|
||||
)
|
||||
if DEBUG:
|
||||
log.debug(
|
||||
"Format capabilities:\n%s" % repr(self.format_capabilities)
|
||||
)
|
||||
|
||||
type_idnums = set()
|
||||
index_table_offset = container_info.pop("$413", None)
|
||||
index_table_length = container_info.pop("$414", 0)
|
||||
|
||||
if len(container_info):
|
||||
log.error("container_info has extra data: %s" % repr(container_info))
|
||||
|
||||
payload_sha1 = bytes_to_hex(sha1(data[header_len:]))
|
||||
|
||||
kfxgen_package_version = ""
|
||||
kfxgen_application_version = ""
|
||||
|
||||
kfxgen_info_data = (
|
||||
data[container_info_offset + container_info_length : header_len]
|
||||
.replace(b"\x1b", b"")
|
||||
.decode("ascii", errors="ignore")
|
||||
)
|
||||
kfxgen_info_json = (
|
||||
kfxgen_info_data.replace("key :", '"key":')
|
||||
.replace("key:", '"key":')
|
||||
.replace("value:", '"value":')
|
||||
)
|
||||
|
||||
try:
|
||||
kfxgen_info = json_deserialize(kfxgen_info_json)
|
||||
except Exception:
|
||||
log.info("Exception decoding json: %s" % kfxgen_info_json)
|
||||
raise
|
||||
|
||||
for info in kfxgen_info:
|
||||
key = info.pop("key")
|
||||
value = info.pop("value")
|
||||
|
||||
if key in {"appVersion", "kfxgen_application_version"}:
|
||||
kfxgen_application_version = value
|
||||
|
||||
elif key in {"buildVersion", "kfxgen_package_version"}:
|
||||
kfxgen_package_version = value
|
||||
|
||||
elif key == "kfxgen_payload_sha1":
|
||||
if value != payload_sha1:
|
||||
log.error(
|
||||
"Incorrect kfxgen_payload_sha1 in container %s" % container_id
|
||||
)
|
||||
log.info("value=%s sha1=%s" % (value, payload_sha1))
|
||||
|
||||
elif key == "kfxgen_acr":
|
||||
if value != container_id:
|
||||
log.error(
|
||||
"Unexpected kfxgen_acr in container %s: %s"
|
||||
% (container_id, value)
|
||||
)
|
||||
|
||||
else:
|
||||
log.error("kfxgen_info has unknown key: %s = %s" % (key, value))
|
||||
|
||||
if len(info):
|
||||
log.error("kfxgen_info has extra data: %s" % repr(info))
|
||||
|
||||
if index_table_length:
|
||||
entity_table = Deserializer(
|
||||
data[index_table_offset : index_table_offset + index_table_length]
|
||||
)
|
||||
|
||||
while len(entity_table):
|
||||
id_idnum = entity_table.unpack("<L")
|
||||
type_idnum = entity_table.unpack("<L")
|
||||
entity_offset = entity_table.unpack("<Q")
|
||||
entity_len = entity_table.unpack("<Q")
|
||||
|
||||
type_idnums.add(type_idnum)
|
||||
|
||||
entity_start = header_len + entity_offset
|
||||
if DEBUG:
|
||||
log.debug(
|
||||
"Container entity: id=%d type=%d len=%d"
|
||||
% (id_idnum, type_idnum, entity_len)
|
||||
)
|
||||
|
||||
if entity_start + entity_len > len(data):
|
||||
raise Exception(
|
||||
"Container %s (%d bytes) is not large enough for entity end (offset %d)"
|
||||
% (container_id, len(data), entity_start + entity_len)
|
||||
)
|
||||
|
||||
self.entities.append(
|
||||
KfxContainerEntity(
|
||||
self.symtab,
|
||||
id_idnum,
|
||||
type_idnum,
|
||||
serialized_data=data[entity_start : entity_start + entity_len],
|
||||
)
|
||||
)
|
||||
|
||||
if type_idnums & KFX_MAIN_CONTAINER_FRAGMENT_IDNUMS:
|
||||
container_format = CONTAINER_FORMAT_KFX_MAIN
|
||||
elif (type_idnums & KFX_METADATA_CONTAINER_FRAGMENT_IDNUMS) or (
|
||||
doc_symbol_length > 0
|
||||
):
|
||||
container_format = CONTAINER_FORMAT_KFX_METADATA
|
||||
elif type_idnums & KFX_ATTACHABLE_CONTAINER_FRAGMENT_IDNUMS:
|
||||
container_format = CONTAINER_FORMAT_KFX_ATTACHABLE
|
||||
else:
|
||||
log.error("Cannot determine KFX container type of %s" % container_id)
|
||||
container_format = "KFX unknown"
|
||||
|
||||
self.container_info = IonAnnotation(
|
||||
[IS("$270")],
|
||||
IonStruct(
|
||||
IS("$409"),
|
||||
container_id,
|
||||
IS("$412"),
|
||||
chunk_size,
|
||||
IS("$410"),
|
||||
compression_type,
|
||||
IS("$411"),
|
||||
drm_scheme,
|
||||
IS("$587"),
|
||||
kfxgen_application_version,
|
||||
IS("$588"),
|
||||
kfxgen_package_version,
|
||||
IS("$161"),
|
||||
container_format,
|
||||
IS("version"),
|
||||
version,
|
||||
IS("$181"),
|
||||
[[e.type_idnum, e.id_idnum] for e in self.entities],
|
||||
),
|
||||
)
|
||||
|
||||
self.container_id = container_id
|
||||
|
||||
def get_fragments(self):
|
||||
if not self.fragments:
|
||||
for data in [
|
||||
self.doc_symbols,
|
||||
self.container_info,
|
||||
self.format_capabilities,
|
||||
]:
|
||||
if data is not None:
|
||||
self.fragments.append(YJFragment(data))
|
||||
|
||||
for entity in self.entities:
|
||||
self.fragments.append(entity.deserialize())
|
||||
|
||||
return self.fragments
|
||||
|
||||
def serialize(self):
|
||||
container_id = None
|
||||
kfxgen_package_version = ""
|
||||
kfxgen_application_version = ""
|
||||
doc_symbols = None
|
||||
format_capabilities = None
|
||||
|
||||
container_cnt = (
|
||||
format_capabilities_cnt
|
||||
) = ion_symbol_table_cnt = container_entity_map_cnt = 0
|
||||
|
||||
for fragment in self.get_fragments():
|
||||
if fragment.ftype == "$270":
|
||||
container_cnt += 1
|
||||
container_id = fragment.value.get("$409", "")
|
||||
kfxgen_application_version = fragment.value.get("$587", "")
|
||||
kfxgen_package_version = fragment.value.get("$588", "")
|
||||
|
||||
elif fragment.ftype == "$593":
|
||||
format_capabilities_cnt += 1
|
||||
format_capabilities = fragment
|
||||
|
||||
elif fragment.ftype == "$ion_symbol_table":
|
||||
ion_symbol_table_cnt += 1
|
||||
doc_symbols = fragment
|
||||
|
||||
doc_symbols = YJFragment(
|
||||
doc_symbols.annotations, value=copy.deepcopy(doc_symbols.value)
|
||||
)
|
||||
for sym_import in doc_symbols.value["imports"]:
|
||||
if "max_id" in sym_import:
|
||||
sym_import["max_id"] += len(SYSTEM_SYMBOL_TABLE.symbols)
|
||||
|
||||
elif fragment.ftype == "$419":
|
||||
container_entity_map_cnt += 1
|
||||
|
||||
if (
|
||||
container_cnt != 1
|
||||
or format_capabilities_cnt > 1
|
||||
or ion_symbol_table_cnt != 1
|
||||
or container_entity_map_cnt != 1
|
||||
):
|
||||
log.error(
|
||||
"Missing/extra fragments required to build KFX container: "
|
||||
"container=%d format_capabilities=%d ion_symbol_table=%d container_entity_map=%d"
|
||||
% (
|
||||
container_cnt,
|
||||
format_capabilities_cnt,
|
||||
ion_symbol_table_cnt,
|
||||
container_entity_map_cnt,
|
||||
)
|
||||
)
|
||||
|
||||
entities = []
|
||||
for fragment in self.fragments:
|
||||
if (fragment.ftype not in CONTAINER_FRAGMENT_TYPES) or (
|
||||
fragment.ftype == "$419"
|
||||
):
|
||||
entities.append(
|
||||
KfxContainerEntity(
|
||||
self.symtab,
|
||||
id_idnum=self.symtab.get_id(
|
||||
IS("$348") if fragment.is_single() else fragment.fid
|
||||
),
|
||||
type_idnum=self.symtab.get_id(fragment.ftype),
|
||||
value=fragment.value,
|
||||
)
|
||||
)
|
||||
|
||||
container = Serializer()
|
||||
container.pack("4s", KfxContainer.SIGNATURE)
|
||||
container.pack("<H", KfxContainer.VERSION)
|
||||
header_len_pack = container.pack("<L", 0)
|
||||
container_info_offset_pack = container.pack("<L", 0)
|
||||
container_info_length_pack = container.pack("<L", 0)
|
||||
|
||||
container_info = IonStruct()
|
||||
container_info[IS("$409")] = container_id
|
||||
container_info[IS("$410")] = DEFAULT_COMPRESSION_TYPE
|
||||
container_info[IS("$411")] = DEFAULT_DRM_SCHEME
|
||||
|
||||
entity_data = Serializer()
|
||||
entity_table = Serializer()
|
||||
entity_offset = 0
|
||||
for entity in entities:
|
||||
serialized_entity = entity.serialize()
|
||||
entity_data.append(serialized_entity)
|
||||
entity_len = len(serialized_entity)
|
||||
entity_table.pack("<L", entity.id_idnum)
|
||||
entity_table.pack("<L", entity.type_idnum)
|
||||
entity_table.pack("<Q", entity_offset)
|
||||
entity_table.pack("<Q", entity_len)
|
||||
entity_offset += entity_len
|
||||
|
||||
container_info[IS("$413")] = len(container)
|
||||
container_info[IS("$414")] = len(entity_table)
|
||||
container.append(entity_table.serialize())
|
||||
|
||||
if doc_symbols is not None:
|
||||
doc_symbol_data = IonBinary(self.symtab).serialize_single_value(doc_symbols)
|
||||
else:
|
||||
doc_symbol_data = b""
|
||||
|
||||
container_info[IS("$415")] = len(container)
|
||||
container_info[IS("$416")] = len(doc_symbol_data)
|
||||
container.append(doc_symbol_data)
|
||||
|
||||
container_info[IS("$412")] = KfxContainer.DEFAULT_CHUNK_SIZE
|
||||
|
||||
if format_capabilities is not None:
|
||||
format_capabilities_data = IonBinary(self.symtab).serialize_single_value(
|
||||
format_capabilities
|
||||
)
|
||||
else:
|
||||
format_capabilities_data = b""
|
||||
|
||||
if self.symtab.local_min_id > 595:
|
||||
container_info[IS("$594")] = len(container)
|
||||
container_info[IS("$595")] = len(format_capabilities_data)
|
||||
container.append(format_capabilities_data)
|
||||
|
||||
container_info_data = IonBinary(self.symtab).serialize_single_value(
|
||||
container_info
|
||||
)
|
||||
container.repack(container_info_length_pack, len(container_info_data))
|
||||
container.repack(container_info_offset_pack, len(container))
|
||||
container.append(container_info_data)
|
||||
|
||||
kfxgen_info = [
|
||||
IonStruct("key", "kfxgen_package_version", "value", kfxgen_package_version),
|
||||
IonStruct(
|
||||
"key", "kfxgen_application_version", "value", kfxgen_application_version
|
||||
),
|
||||
IonStruct(
|
||||
"key", "kfxgen_payload_sha1", "value", bytes_to_hex(entity_data.sha1())
|
||||
),
|
||||
IonStruct("key", "kfxgen_acr", "value", container_id),
|
||||
]
|
||||
container.append(
|
||||
json_serialize_compact(kfxgen_info)
|
||||
.replace(
|
||||
'"key":',
|
||||
"key:",
|
||||
)
|
||||
.replace('"value":', "value:")
|
||||
.encode("ascii")
|
||||
)
|
||||
|
||||
container.repack(header_len_pack, len(container))
|
||||
|
||||
container.extend(entity_data)
|
||||
|
||||
return container.serialize()
|
||||
|
||||
|
||||
class KfxContainerEntity(object):
|
||||
SIGNATURE = b"ENTY"
|
||||
VERSION = 1
|
||||
ALLOWED_VERSIONS = {1}
|
||||
MIN_LENGTH = 10
|
||||
|
||||
def __init__(
|
||||
self, symtab, id_idnum=None, type_idnum=None, value=None, serialized_data=None
|
||||
):
|
||||
self.symtab = symtab
|
||||
self.id_idnum = id_idnum
|
||||
self.type_idnum = type_idnum
|
||||
self.value = value
|
||||
self.serialized_data = serialized_data
|
||||
|
||||
def deserialize(self, data=None):
|
||||
if data is None:
|
||||
data = self.serialized_data
|
||||
|
||||
cont_entity = Deserializer(data)
|
||||
signature = cont_entity.unpack("4s")
|
||||
version = cont_entity.unpack("<H")
|
||||
header_len = cont_entity.unpack("<L")
|
||||
|
||||
if signature != KfxContainerEntity.SIGNATURE:
|
||||
raise Exception(
|
||||
"Container entity signature is incorrect (%s)"
|
||||
% bytes_to_separated_hex(signature)
|
||||
)
|
||||
|
||||
if version not in KfxContainerEntity.ALLOWED_VERSIONS:
|
||||
log.error("Container entity version is incorrect (%d)" % version)
|
||||
|
||||
if header_len < KfxContainerEntity.MIN_LENGTH:
|
||||
raise Exception("Container entity header is too short (%d)" % header_len)
|
||||
|
||||
self.header = data[:header_len]
|
||||
|
||||
entity_info = IonBinary(self.symtab).deserialize_single_value(
|
||||
cont_entity.extract(upto=header_len)
|
||||
)
|
||||
compression_type = entity_info.pop("$410", DEFAULT_COMPRESSION_TYPE)
|
||||
drm_scheme = entity_info.pop("$411", DEFAULT_DRM_SCHEME)
|
||||
|
||||
if compression_type != DEFAULT_COMPRESSION_TYPE:
|
||||
log.error(
|
||||
"Container entity %s has unexpected bcComprType: %s"
|
||||
% (repr(self), repr(compression_type))
|
||||
)
|
||||
|
||||
if drm_scheme != DEFAULT_DRM_SCHEME:
|
||||
log.error(
|
||||
"Container entity %s has unexpected bcDRMScheme: %s"
|
||||
% (repr(self), repr(drm_scheme))
|
||||
)
|
||||
|
||||
if len(entity_info):
|
||||
raise Exception(
|
||||
"Container entity %s info has extra data: %s"
|
||||
% (repr(self), repr(entity_info))
|
||||
)
|
||||
|
||||
entity_data = cont_entity.extract()
|
||||
|
||||
fid = self.symtab.get_symbol(self.id_idnum)
|
||||
ftype = self.symtab.get_symbol(self.type_idnum)
|
||||
|
||||
if ftype in RAW_FRAGMENT_TYPES:
|
||||
self.value = IonBLOB(entity_data)
|
||||
else:
|
||||
self.value = IonBinary(self.symtab).deserialize_single_value(entity_data)
|
||||
|
||||
if isinstance(self.value, IonAnnotation):
|
||||
if self.value.is_annotation(ftype) and fid == "$348":
|
||||
fid = ftype
|
||||
self.value = self.value.value
|
||||
else:
|
||||
log.error(
|
||||
"Entity %s has IonAnnotation as value: %s"
|
||||
% (repr(self), repr(self.value))
|
||||
)
|
||||
|
||||
return YJFragment(
|
||||
fid=fid if fid != "$348" else None, ftype=ftype, value=self.value
|
||||
)
|
||||
|
||||
def serialize(self):
|
||||
entity = Serializer()
|
||||
entity.pack("4s", KfxContainerEntity.SIGNATURE)
|
||||
entity.pack("<H", KfxContainerEntity.VERSION)
|
||||
header_len_pack = entity.pack("<L", 0)
|
||||
|
||||
entity_info = IonStruct()
|
||||
entity_info[IS("$410")] = DEFAULT_COMPRESSION_TYPE
|
||||
entity_info[IS("$411")] = DEFAULT_DRM_SCHEME
|
||||
entity.append(IonBinary(self.symtab).serialize_single_value(entity_info))
|
||||
|
||||
entity.repack(header_len_pack, len(entity))
|
||||
|
||||
ftype = self.symtab.get_symbol(self.type_idnum)
|
||||
if ftype in RAW_FRAGMENT_TYPES:
|
||||
if isinstance(self.value, IonBLOB):
|
||||
entity.append(bytes(self.value))
|
||||
else:
|
||||
raise Exception(
|
||||
"KfxContainerEntity %s must be IonBLOB, found %s"
|
||||
% (ftype, type_name(self.value))
|
||||
)
|
||||
else:
|
||||
entity.append(IonBinary(self.symtab).serialize_single_value(self.value))
|
||||
|
||||
return entity.serialize()
|
||||
|
||||
def __repr__(self):
|
||||
return "$%d/$%d" % (self.type_idnum, self.id_idnum)
|
||||
693
kindle_download_helper/third_party/kfxlib/kpf_book.py
vendored
Normal file
693
kindle_download_helper/third_party/kfxlib/kpf_book.py
vendored
Normal file
@@ -0,0 +1,693 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import copy
|
||||
import decimal
|
||||
import re
|
||||
import uuid
|
||||
|
||||
from .ion import (
|
||||
IS,
|
||||
IonAnnotation,
|
||||
IonFloat,
|
||||
IonList,
|
||||
IonSExp,
|
||||
IonString,
|
||||
IonStruct,
|
||||
IonSymbol,
|
||||
ion_type,
|
||||
isstring,
|
||||
unannotated,
|
||||
)
|
||||
from .message_logging import log
|
||||
from .python_transition import IS_PYTHON2
|
||||
from .utilities import font_file_ext
|
||||
from .yj_container import YJFragment, YJFragmentKey
|
||||
from .yj_structure import EID_REFERENCES, FORMAT_SYMBOLS, MAX_CONTENT_FRAGMENT_SIZE
|
||||
from .yj_versions import GENERIC_CREATOR_VERSIONS, is_known_aux_metadata
|
||||
|
||||
if IS_PYTHON2:
|
||||
from .python_transition import str
|
||||
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2016-2022, John Howell <jhowell@acm.org>"
|
||||
|
||||
|
||||
FIX_BOOK = True
|
||||
CREATE_CONTENT_FRAGMENTS = True
|
||||
VERIFY_ORIGINAL_POSITION_MAP = False
|
||||
|
||||
|
||||
SHORT_TOOL_NAME = {
|
||||
"Kindle Previewer 3": "KPR",
|
||||
"Kindle Create": "KC",
|
||||
}
|
||||
|
||||
|
||||
class KpfBook(object):
|
||||
def fix_kpf_prepub_book(self, fix_book, retain_yj_locals):
|
||||
self.retain_yj_locals = retain_yj_locals
|
||||
|
||||
if len(self.yj_containers) != 1:
|
||||
raise Exception("A KPF book should have only one container")
|
||||
|
||||
self.kpf_container = self.yj_containers[0]
|
||||
|
||||
if not (fix_book and FIX_BOOK):
|
||||
return
|
||||
|
||||
for fragment in self.fragments.get_all("$417"):
|
||||
orig_fid = fragment.fid
|
||||
fixed_fid = fix_resource_location(orig_fid)
|
||||
if fixed_fid != orig_fid:
|
||||
self.fragments.remove(fragment)
|
||||
self.fragments.append(
|
||||
YJFragment(
|
||||
ftype="$417",
|
||||
fid=self.create_local_symbol(fixed_fid),
|
||||
value=fragment.value,
|
||||
)
|
||||
)
|
||||
|
||||
for fragment in list(self.fragments):
|
||||
if fragment.ftype != "$270":
|
||||
self.kpf_fix_fragment(fragment)
|
||||
|
||||
for fragment in self.fragments.get_all("$266"):
|
||||
if fragment.value.get("$183", {}).get("$143", None) == 0:
|
||||
fragment.value["$183"].pop("$143")
|
||||
|
||||
fragment = self.fragments.get("$550")
|
||||
if fragment is not None:
|
||||
for lm in fragment.value:
|
||||
lm.pop("$178", None)
|
||||
|
||||
fragment = self.fragments.get("$490")
|
||||
if fragment is not None:
|
||||
for category in ["kindle_audit_metadata", "kindle_title_metadata"]:
|
||||
for cm in fragment.value["$491"]:
|
||||
if cm["$495"] == category:
|
||||
break
|
||||
else:
|
||||
fragment.value["$491"].append(
|
||||
IonStruct(IS("$495"), category, IS("$258"), [])
|
||||
)
|
||||
|
||||
for cm in fragment.value["$491"]:
|
||||
if cm["$495"] == "kindle_audit_metadata":
|
||||
if (
|
||||
(
|
||||
self.get_metadata_value(
|
||||
"file_creator", category="kindle_audit_metadata"
|
||||
),
|
||||
self.get_metadata_value(
|
||||
"creator_version", category="kindle_audit_metadata"
|
||||
),
|
||||
)
|
||||
in GENERIC_CREATOR_VERSIONS
|
||||
) and self.kpf_container.kcb_data:
|
||||
kcb_metadata = self.kpf_container.kcb_data.get("metadata", {})
|
||||
tool_name = kcb_metadata.get("tool_name")
|
||||
tool_version = kcb_metadata.get("tool_version")
|
||||
|
||||
if (
|
||||
tool_name
|
||||
and tool_version
|
||||
and not tool_version.startswith("unknown")
|
||||
):
|
||||
for metadata in cm["$258"]:
|
||||
if metadata["$492"] == "file_creator":
|
||||
metadata["$307"] = SHORT_TOOL_NAME.get(
|
||||
tool_name, tool_name
|
||||
)
|
||||
|
||||
if metadata["$492"] == "creator_version":
|
||||
metadata["$307"] = tool_version
|
||||
|
||||
elif cm["$495"] == "kindle_title_metadata":
|
||||
if self.get_metadata_value("asset_id") is None:
|
||||
cm["$258"].append(
|
||||
IonStruct(
|
||||
IS("$492"),
|
||||
"asset_id",
|
||||
IS("$307"),
|
||||
self.create_container_id(),
|
||||
)
|
||||
)
|
||||
|
||||
if self.get_metadata_value("is_sample") is None:
|
||||
cm["$258"].append(
|
||||
IonStruct(IS("$492"), "is_sample", IS("$307"), False)
|
||||
)
|
||||
|
||||
if (
|
||||
self.get_metadata_value("language", default="")
|
||||
.lower()
|
||||
.startswith("ja-zh")
|
||||
):
|
||||
for metadata in cm["$258"]:
|
||||
if metadata["$492"] == "language":
|
||||
metadata["$307"] = metadata["$307"][3:].replace(
|
||||
"=", "-"
|
||||
)
|
||||
|
||||
if (
|
||||
self.kpf_container.source_epub is not None
|
||||
and len(self.kpf_container.source_epub.authors) > 1
|
||||
):
|
||||
for i, md in reversed(list(enumerate(cm["$258"]))):
|
||||
if md["$492"] == "author":
|
||||
cm["$258"].pop(i)
|
||||
|
||||
for author in self.kpf_container.source_epub.authors:
|
||||
cm["$258"].append(
|
||||
IonStruct(IS("$492"), "author", IS("$307"), author)
|
||||
)
|
||||
|
||||
if (
|
||||
self.kpf_container.source_epub.issue_date
|
||||
and self.get_metadata_value("issue_date") is None
|
||||
):
|
||||
cm["$258"].append(
|
||||
IonStruct(
|
||||
IS("$492"),
|
||||
"issue_date",
|
||||
IS("$307"),
|
||||
self.kpf_container.source_epub.issue_date,
|
||||
)
|
||||
)
|
||||
|
||||
if self.get_metadata_value("override_kindle_font") is None:
|
||||
cm["$258"].append(
|
||||
IonStruct(
|
||||
IS("$492"), "override_kindle_font", IS("$307"), False
|
||||
)
|
||||
)
|
||||
|
||||
if (
|
||||
self.get_metadata_value("cover_image") is None
|
||||
and self.get_metadata_value(
|
||||
"yj_fixed_layout", category="kindle_capability_metadata"
|
||||
)
|
||||
is not None
|
||||
):
|
||||
cover_resource = self.locate_cover_image_resource_from_content()
|
||||
if cover_resource is not None:
|
||||
cm["$258"].append(
|
||||
IonStruct(
|
||||
IS("$492"),
|
||||
"cover_image",
|
||||
IS("$307"),
|
||||
str(cover_resource),
|
||||
)
|
||||
)
|
||||
|
||||
for fragment in self.fragments.get_all("$262"):
|
||||
if fragment.fid != "$262":
|
||||
self.fragments.remove(fragment)
|
||||
self.fragments.append(YJFragment(ftype="$262", value=fragment.value))
|
||||
|
||||
location = fragment.value["$165"]
|
||||
font_data_fragment = self.fragments[
|
||||
YJFragmentKey(ftype="$417", fid=location)
|
||||
]
|
||||
self.fragments.remove(font_data_fragment)
|
||||
self.fragments.append(
|
||||
YJFragment(
|
||||
ftype="$418",
|
||||
fid=self.create_local_symbol(location),
|
||||
value=font_data_fragment.value,
|
||||
)
|
||||
)
|
||||
|
||||
for fragment in self.fragments.get_all("$164"):
|
||||
fv = fragment.value
|
||||
if (
|
||||
fv.get("$161") == "$287"
|
||||
and "$422" not in fv
|
||||
and "$423" not in fv
|
||||
and "$167" in fv
|
||||
):
|
||||
referred_resources = fv["$167"]
|
||||
for frag in self.fragments.get_all("$164"):
|
||||
if (
|
||||
frag.fid in referred_resources
|
||||
and "$422" in frag.value
|
||||
and "$423" in frag.value
|
||||
):
|
||||
fv[IS("$422")] = frag.value["$422"]
|
||||
fv[IS("$423")] = frag.value["$423"]
|
||||
break
|
||||
|
||||
if fv.get("$162") == "":
|
||||
fv.pop("$162")
|
||||
log.warning(
|
||||
"Removed empty mime type from external_resource %s" % fv.get("$175")
|
||||
)
|
||||
|
||||
cover_image_data = self.get_cover_image_data()
|
||||
if cover_image_data is not None:
|
||||
new_cover_image_data = self.fix_cover_image_data(cover_image_data)
|
||||
if new_cover_image_data != cover_image_data:
|
||||
self.set_cover_image_data(new_cover_image_data)
|
||||
|
||||
canonical_format = (2, 0) if self.is_illustrated_layout else (1, 0)
|
||||
|
||||
file_creator = self.get_metadata_value(
|
||||
"file_creator", category="kindle_audit_metadata", default=""
|
||||
)
|
||||
creator_version = self.get_metadata_value(
|
||||
"creator_version", category="kindle_audit_metadata", default=""
|
||||
)
|
||||
|
||||
if (
|
||||
file_creator == "KC"
|
||||
or (file_creator == "KTC" and creator_version >= "1.11")
|
||||
) and canonical_format < (2, 0):
|
||||
canonical_format = (2, 0)
|
||||
|
||||
content_features = self.fragments.get("$585")
|
||||
if content_features is not None:
|
||||
content_features.value.pop("$155", None)
|
||||
content_features.value.pop("$598", None)
|
||||
else:
|
||||
content_features = YJFragment(ftype="$585", value=IonStruct(IS("$590"), []))
|
||||
self.fragments.append(content_features)
|
||||
|
||||
features = content_features.value["$590"]
|
||||
|
||||
def add_feature(feature, namespace="com.amazon.yjconversion", version=(1, 0)):
|
||||
if self.get_feature_value(feature, namespace=namespace) is None:
|
||||
features.append(
|
||||
IonStruct(
|
||||
IS("$586"),
|
||||
namespace,
|
||||
IS("$492"),
|
||||
feature,
|
||||
IS("$589"),
|
||||
IonStruct(
|
||||
IS("version"),
|
||||
IonStruct(IS("$587"), version[0], IS("$588"), version[1]),
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
def add_feature_from_metadata(
|
||||
metadata,
|
||||
feature,
|
||||
category="kindle_capability_metadata",
|
||||
namespace="com.amazon.yjconversion",
|
||||
version=(1, 0),
|
||||
):
|
||||
if self.get_metadata_value(metadata, category=category) is not None:
|
||||
add_feature(feature, namespace, version)
|
||||
|
||||
add_feature("CanonicalFormat", namespace="SDK.Marker", version=canonical_format)
|
||||
|
||||
if self.is_fixed_layout:
|
||||
if self.has_pdf_resource:
|
||||
add_feature("yj_pdf_support")
|
||||
add_feature_from_metadata("yj_fixed_layout", "yj_fixed_layout")
|
||||
else:
|
||||
add_feature_from_metadata(
|
||||
"yj_fixed_layout", "yj_non_pdf_fixed_layout", version=2
|
||||
)
|
||||
|
||||
has_hdv_image = has_tiles = yj_jpg_rst_marker_present = False
|
||||
for fragment in self.fragments.get_all("$164"):
|
||||
fv = fragment.value
|
||||
if fv.get("$422", 0) > 1920 or fv.get("$423", 0) > 1920 or "$636" in fv:
|
||||
has_hdv_image = True
|
||||
|
||||
if IS("$797") in fv:
|
||||
has_tiles = True
|
||||
|
||||
if (not yj_jpg_rst_marker_present) and fv.get("$161") == "$285":
|
||||
location = fv.get("$165", None)
|
||||
if location is not None:
|
||||
raw_media = self.fragments.get(
|
||||
ftype="$417", fid=location, first=True
|
||||
)
|
||||
if raw_media is not None:
|
||||
if re.search(b"\xff[\xd0-\xd7]", raw_media.value.tobytes()):
|
||||
yj_jpg_rst_marker_present = True
|
||||
|
||||
if not self.is_fixed_layout:
|
||||
if has_tiles:
|
||||
add_feature("yj_hdv", (2, 0))
|
||||
elif has_hdv_image:
|
||||
add_feature("yj_hdv")
|
||||
|
||||
if yj_jpg_rst_marker_present:
|
||||
add_feature("yj_jpg_rst_marker_present")
|
||||
|
||||
add_feature_from_metadata("graphical_highlights", "yj_graphical_highlights")
|
||||
add_feature_from_metadata("yj_textbook", "yj_textbook")
|
||||
|
||||
if self.fragments.get("$389") is None:
|
||||
log.info("Adding book_navigation")
|
||||
|
||||
book_navigation = []
|
||||
for reading_order_name in self.reading_order_names():
|
||||
book_nav = IonStruct()
|
||||
|
||||
if reading_order_name:
|
||||
book_nav[IS("$178")] = reading_order_name
|
||||
|
||||
book_nav[IS("$392")] = []
|
||||
book_navigation.append(book_nav)
|
||||
|
||||
self.fragments.append(YJFragment(ftype="$389", value=book_navigation))
|
||||
|
||||
for book_navigation in self.fragments["$389"].value:
|
||||
pages = []
|
||||
nav_containers = book_navigation["$392"]
|
||||
has_page_list = False
|
||||
|
||||
for nav_container in nav_containers:
|
||||
nav_container = unannotated(nav_container)
|
||||
nav_type = nav_container.get("$235", None)
|
||||
if nav_type == "$236":
|
||||
entries = nav_container.get("$247", [])
|
||||
i = 0
|
||||
while i < len(entries):
|
||||
entry = unannotated(entries[i])
|
||||
label = entry.get("$241", {}).get("$244", "")
|
||||
if label.startswith("page_list_entry:"):
|
||||
seq, sep, text = label.partition(":")[2].partition(":")
|
||||
|
||||
pages.append(
|
||||
(
|
||||
int(seq),
|
||||
IonAnnotation(
|
||||
[IS("$393")],
|
||||
IonStruct(
|
||||
IS("$241"),
|
||||
IonStruct(IS("$244"), text),
|
||||
IS("$246"),
|
||||
entry["$246"],
|
||||
),
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
entries.pop(i)
|
||||
i -= 1
|
||||
|
||||
i += 1
|
||||
elif nav_type == "$237":
|
||||
log.info("KPF book contains a page list")
|
||||
has_page_list = True
|
||||
|
||||
if pages and not has_page_list:
|
||||
log.info(
|
||||
"Transformed %d KFX landmark entries into a page list" % len(pages)
|
||||
)
|
||||
|
||||
nav_containers.append(
|
||||
IonAnnotation(
|
||||
[IS("$391")],
|
||||
IonStruct(
|
||||
IS("$235"),
|
||||
IS("$237"),
|
||||
IS("$239"),
|
||||
self.kpf_gen_uuid_symbol(),
|
||||
IS("$247"),
|
||||
[p[1] for p in sorted(pages)],
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
if self.is_dictionary:
|
||||
self.is_kpf_prepub = False
|
||||
else:
|
||||
has_text_block = False
|
||||
if CREATE_CONTENT_FRAGMENTS:
|
||||
content_fragment_data = {}
|
||||
for section_name in self.ordered_section_names():
|
||||
for story_name in self.extract_section_story_names(section_name):
|
||||
self.kpf_collect_content_strings(
|
||||
story_name, content_fragment_data
|
||||
)
|
||||
|
||||
for content_name, content_list in content_fragment_data.items():
|
||||
has_text_block = True
|
||||
self.fragments.append(
|
||||
YJFragment(
|
||||
ftype="$145",
|
||||
fid=content_name,
|
||||
value=IonStruct(
|
||||
IS("name"), content_name, IS("$146"), content_list
|
||||
),
|
||||
)
|
||||
)
|
||||
else:
|
||||
log.warning("Content fragment creation is disabled")
|
||||
|
||||
map_pos_info = self.collect_position_map_info()
|
||||
|
||||
if VERIFY_ORIGINAL_POSITION_MAP:
|
||||
content_pos_info = self.collect_content_position_info()
|
||||
self.verify_position_info(content_pos_info, map_pos_info)
|
||||
|
||||
if len(map_pos_info) < 10 and self.is_illustrated_layout:
|
||||
log.warning("creating position map (original is missing or incorrect)")
|
||||
map_pos_info = self.collect_content_position_info()
|
||||
|
||||
self.is_kpf_prepub = False
|
||||
has_spim, has_position_id_offset = self.create_position_map(map_pos_info)
|
||||
|
||||
has_yj_location_pid_map = False
|
||||
if self.fragments.get("$550") is None and not (
|
||||
self.is_print_replica or self.is_magazine
|
||||
):
|
||||
loc_info = self.generate_approximate_locations(map_pos_info)
|
||||
has_yj_location_pid_map = self.create_location_map(loc_info)
|
||||
|
||||
if self.fragments.get("$395") is None:
|
||||
self.fragments.append(
|
||||
YJFragment(ftype="$395", value=IonStruct(IS("$247"), []))
|
||||
)
|
||||
|
||||
for fragment in self.fragments.get_all("$593"):
|
||||
self.fragments.remove(fragment)
|
||||
|
||||
fc = []
|
||||
|
||||
if has_spim or has_yj_location_pid_map:
|
||||
fc.append(
|
||||
IonStruct(IS("$492"), "kfxgen.positionMaps", IS("version"), 2)
|
||||
)
|
||||
|
||||
if has_position_id_offset:
|
||||
fc.append(
|
||||
IonStruct(IS("$492"), "kfxgen.pidMapWithOffset", IS("version"), 1)
|
||||
)
|
||||
|
||||
if has_text_block:
|
||||
fc.append(IonStruct(IS("$492"), "kfxgen.textBlock", IS("version"), 1))
|
||||
|
||||
self.fragments.append(YJFragment(ftype="$593", value=fc))
|
||||
|
||||
for fragment in self.fragments.get_all("$597"):
|
||||
for kv in fragment.value.get("$258", []):
|
||||
key = kv.get("$492", "")
|
||||
value = kv.get("$307", "")
|
||||
if not is_known_aux_metadata(key, value):
|
||||
log.warning("Unknown auxiliary_data: %s=%s" % (key, value))
|
||||
|
||||
self.check_fragment_usage(rebuild=True, ignore_extra=True)
|
||||
|
||||
self.check_symbol_table(rebuild=True, ignore_unused=True)
|
||||
|
||||
def kpf_gen_uuid_symbol(self):
|
||||
return self.create_local_symbol(str(uuid.uuid4()))
|
||||
|
||||
def kpf_fix_fragment(self, fragment):
|
||||
def _fix_ion_data(data, container):
|
||||
data_type = ion_type(data)
|
||||
|
||||
if data_type is IonAnnotation:
|
||||
if data.is_annotation("$608"):
|
||||
return _fix_ion_data(data.value, container)
|
||||
|
||||
new_annot = [_fix_ion_data(annot, None) for annot in data.annotations]
|
||||
return IonAnnotation(new_annot, _fix_ion_data(data.value, container))
|
||||
|
||||
if data_type is IonList:
|
||||
new_list = []
|
||||
for i, fc in enumerate(data):
|
||||
if container == "$146" and isinstance(fc, IonSymbol):
|
||||
structure = self.fragments.get(
|
||||
YJFragmentKey(ftype="$608", fid=fc)
|
||||
)
|
||||
if structure is not None:
|
||||
fc = copy.deepcopy(structure.value)
|
||||
|
||||
if (not self.is_dictionary) and (
|
||||
(
|
||||
fragment.ftype == "$609"
|
||||
and container == "contains_list_"
|
||||
and i == 1
|
||||
)
|
||||
or (
|
||||
fragment.ftype == "$538"
|
||||
and container == "yj.semantics.containers_with_semantics"
|
||||
)
|
||||
):
|
||||
fc = self.symbol_id(fc)
|
||||
|
||||
if container == "$181":
|
||||
list_container = "contains_list_"
|
||||
elif container == "$141":
|
||||
list_container = "$141"
|
||||
else:
|
||||
list_container = None
|
||||
|
||||
new_list.append(_fix_ion_data(fc, list_container))
|
||||
|
||||
return new_list
|
||||
|
||||
if data_type is IonSExp:
|
||||
new_sexp = IonSExp()
|
||||
for fc in data:
|
||||
new_sexp.append(_fix_ion_data(fc, None))
|
||||
|
||||
return new_sexp
|
||||
|
||||
if data_type is IonStruct:
|
||||
new_struct = IonStruct()
|
||||
for fk, fv in data.items():
|
||||
fv = _fix_ion_data(fv, fk)
|
||||
|
||||
if not self.is_dictionary:
|
||||
if fk == "$597":
|
||||
continue
|
||||
|
||||
if fk == "$239":
|
||||
self.create_local_symbol(str(fv))
|
||||
|
||||
if (
|
||||
fk in EID_REFERENCES
|
||||
and fragment.ftype != "$597"
|
||||
and isinstance(fv, IonSymbol)
|
||||
):
|
||||
if fk == "$598":
|
||||
fk = IS("$155")
|
||||
|
||||
if (
|
||||
fragment.ftype != "$610"
|
||||
or self.fragments.get(ftype="$260", fid=fv) is None
|
||||
):
|
||||
fv = self.symbol_id(fv)
|
||||
|
||||
if fk == "$161" and isstring(fv):
|
||||
fv = IS(FORMAT_SYMBOLS[fv])
|
||||
|
||||
if (not self.retain_yj_locals) and (
|
||||
fk.startswith("yj.authoring.")
|
||||
or fk.startswith("yj.conversion.")
|
||||
or fk.startswith("yj.print.")
|
||||
or fk.startswith("yj.semantics.")
|
||||
or fk == "$790"
|
||||
):
|
||||
continue
|
||||
|
||||
if (
|
||||
self.is_illustrated_layout
|
||||
and fragment.ftype == "$260"
|
||||
and container == "$141"
|
||||
and fk in ["$67", "$66"]
|
||||
):
|
||||
continue
|
||||
|
||||
if fk == "$165":
|
||||
if ion_type(fv) is not IonString:
|
||||
raise Exception("location is not IonString: %s" % fv)
|
||||
|
||||
fv = fix_resource_location(fv)
|
||||
|
||||
if fragment.ftype == "$157" and fk == "$173" and fv != fragment.fid:
|
||||
log.info(
|
||||
"Fixing incorrect name %s of style %s" % (fv, fragment.fid)
|
||||
)
|
||||
fv = fragment.fid
|
||||
|
||||
new_struct[_fix_ion_data(fk, None)] = fv
|
||||
|
||||
return new_struct
|
||||
|
||||
if data_type is IonFloat:
|
||||
dec = decimal.Decimal("%g" % data)
|
||||
if abs(dec) < 0.001:
|
||||
dec = decimal.Decimal("0")
|
||||
|
||||
return dec
|
||||
|
||||
return data
|
||||
|
||||
fragment.value = _fix_ion_data(fragment.value, None)
|
||||
|
||||
def kpf_collect_content_strings(self, story_name, content_fragment_data):
|
||||
def _kpf_collect_content_strings(data):
|
||||
data_type = ion_type(data)
|
||||
|
||||
if data_type is IonAnnotation:
|
||||
_kpf_collect_content_strings(data.value)
|
||||
|
||||
elif data_type is IonList or data_type is IonSExp:
|
||||
for fc in data:
|
||||
_kpf_collect_content_strings(fc)
|
||||
|
||||
elif data_type is IonStruct:
|
||||
for fk, fv in data.items():
|
||||
if fk == "$145" and isstring(fv):
|
||||
if (
|
||||
len(content_fragment_data) == 0
|
||||
or self._content_fragment_size >= MAX_CONTENT_FRAGMENT_SIZE
|
||||
):
|
||||
self._content_fragment_name = self.create_local_symbol(
|
||||
"content_%d" % (len(content_fragment_data) + 1)
|
||||
)
|
||||
content_fragment_data[self._content_fragment_name] = []
|
||||
self._content_fragment_size = 0
|
||||
|
||||
content_fragment_data[self._content_fragment_name].append(fv)
|
||||
self._content_fragment_size += len(fv.encode("utf8"))
|
||||
|
||||
data[fk] = IonStruct(
|
||||
IS("name"),
|
||||
self._content_fragment_name,
|
||||
IS("$403"),
|
||||
len(content_fragment_data[self._content_fragment_name]) - 1,
|
||||
)
|
||||
else:
|
||||
_kpf_collect_content_strings(fv)
|
||||
|
||||
_kpf_collect_content_strings(
|
||||
self.fragments[YJFragmentKey(ftype="$259", fid=story_name)].value
|
||||
)
|
||||
|
||||
def symbol_id(self, symbol):
|
||||
if symbol is None or isinstance(symbol, int):
|
||||
return symbol
|
||||
|
||||
return self.symtab.get_id(symbol)
|
||||
|
||||
def kpf_add_font_ext(self, filename, raw_font):
|
||||
ext = font_file_ext(raw_font)
|
||||
if not ext:
|
||||
log.warn("font %s has unknown type (possibly obfuscated)" % filename)
|
||||
|
||||
return "%s%s" % (filename, ext)
|
||||
|
||||
|
||||
def section_sort_key(reading_order, s):
|
||||
try:
|
||||
return (reading_order.index(s), s)
|
||||
except ValueError:
|
||||
return (len(reading_order), s)
|
||||
|
||||
|
||||
def fix_resource_location(s):
|
||||
return s if s.startswith("resource/") else "resource/%s" % s
|
||||
572
kindle_download_helper/third_party/kfxlib/kpf_container.py
vendored
Normal file
572
kindle_download_helper/third_party/kfxlib/kpf_container.py
vendored
Normal file
@@ -0,0 +1,572 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import io
|
||||
import os
|
||||
|
||||
try:
|
||||
import apsw
|
||||
|
||||
have_apsw = True
|
||||
except ImportError:
|
||||
import sqlite3
|
||||
|
||||
have_apsw = False
|
||||
|
||||
|
||||
from .ion import (
|
||||
IS,
|
||||
IonAnnotation,
|
||||
IonBLOB,
|
||||
IonInt,
|
||||
IonList,
|
||||
IonSExp,
|
||||
IonString,
|
||||
IonStruct,
|
||||
ion_type,
|
||||
)
|
||||
from .ion_binary import IonBinary
|
||||
from .message_logging import log
|
||||
from .original_source_epub import SourceEpub
|
||||
from .python_transition import IS_PYTHON2
|
||||
from .utilities import (
|
||||
ZIP_SIGNATURE,
|
||||
DataFile,
|
||||
Deserializer,
|
||||
KFXDRMError,
|
||||
bytes_to_separated_hex,
|
||||
json_deserialize,
|
||||
json_serialize,
|
||||
natural_sort_key,
|
||||
temp_filename,
|
||||
)
|
||||
from .yj_container import (
|
||||
CONTAINER_FORMAT_KPF,
|
||||
DRMION_SIGNATURE,
|
||||
ROOT_FRAGMENT_TYPES,
|
||||
YJContainer,
|
||||
YJFragment,
|
||||
)
|
||||
from .yj_symbol_catalog import SYSTEM_SYMBOL_TABLE
|
||||
|
||||
if IS_PYTHON2:
|
||||
from .python_transition import repr
|
||||
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2016-2022, John Howell <jhowell@acm.org>"
|
||||
|
||||
|
||||
DEBUG = False
|
||||
RETAIN_KFX_ID_ANNOT = False
|
||||
|
||||
RESOURCE_DIRECTORY = "resources"
|
||||
DICTIONARY_RULES_FILENAME = "DictionaryRules.ion"
|
||||
|
||||
SQLITE_SIGNATURE = b"SQLite format 3\0"
|
||||
|
||||
|
||||
class KpfContainer(YJContainer):
|
||||
KPF_SIGNATURE = ZIP_SIGNATURE
|
||||
KDF_SIGNATURE = SQLITE_SIGNATURE
|
||||
db_timeout = 30
|
||||
|
||||
def __init__(self, symtab, datafile=None, fragments=None, book=None):
|
||||
YJContainer.__init__(self, symtab, datafile=datafile, fragments=fragments)
|
||||
self.book = book
|
||||
|
||||
def deserialize(self, ignore_drm=False):
|
||||
self.ignore_drm = ignore_drm
|
||||
self.fragments.clear()
|
||||
|
||||
self.kpf_datafile = (
|
||||
self.kdf_datafile
|
||||
) = self.kcb_datafile = self.kcb_data = self.source_epub = None
|
||||
|
||||
if self.datafile.is_zipfile():
|
||||
self.kpf_datafile = self.datafile
|
||||
|
||||
with self.kpf_datafile.as_ZipFile() as zf:
|
||||
for info in zf.infolist():
|
||||
ext = os.path.splitext(info.filename)[1]
|
||||
if ext == ".kdf":
|
||||
self.kdf_datafile = DataFile(
|
||||
info.filename, zf.read(info), self.kpf_datafile
|
||||
)
|
||||
|
||||
elif ext == ".kdf-journal":
|
||||
if len(zf.read(info)) > 0:
|
||||
raise Exception(
|
||||
"kdf-journal is not empty in %s"
|
||||
% self.kpf_datafile.name
|
||||
)
|
||||
|
||||
elif ext == ".kcb":
|
||||
self.kcb_datafile = DataFile(
|
||||
info.filename, zf.read(info), self.kpf_datafile
|
||||
)
|
||||
self.kcb_data = json_deserialize(self.kcb_datafile.get_data())
|
||||
|
||||
if self.kdf_datafile is None:
|
||||
raise Exception("Failed to locate KDF within %s" % self.datafile.name)
|
||||
|
||||
else:
|
||||
self.kdf_datafile = self.datafile
|
||||
|
||||
unwrapped_kdf_datafile = SQLiteFingerprintWrapper(self.kdf_datafile).remove()
|
||||
|
||||
db_filename = (
|
||||
unwrapped_kdf_datafile.name
|
||||
if unwrapped_kdf_datafile.is_real_file and not self.book.is_netfs
|
||||
else temp_filename("kdf", unwrapped_kdf_datafile.get_data())
|
||||
)
|
||||
|
||||
if have_apsw:
|
||||
if natural_sort_key(apsw.sqlitelibversion()) < natural_sort_key("3.8.2"):
|
||||
raise Exception(
|
||||
"SQLite version 3.8.2 or later is necessary in order to use a WITHOUT ROWID table. Found version %s"
|
||||
% apsw.sqlitelibversion()
|
||||
)
|
||||
|
||||
conn = apsw.Connection(db_filename)
|
||||
else:
|
||||
if sqlite3.sqlite_version_info < (3, 8, 2):
|
||||
raise Exception(
|
||||
"SQLite version 3.8.2 or later is necessary in order to use a WITHOUT ROWID table. Found version %s"
|
||||
% sqlite3.sqlite_version
|
||||
)
|
||||
|
||||
conn = sqlite3.connect(db_filename, KpfContainer.db_timeout)
|
||||
|
||||
cursor = conn.cursor()
|
||||
|
||||
sql_list = cursor.execute(
|
||||
"SELECT sql FROM sqlite_master WHERE type='table';"
|
||||
).fetchall()
|
||||
schema = set([x[0] for x in sql_list])
|
||||
|
||||
dictionary_index_terms = set()
|
||||
first_head_word = ""
|
||||
INDEX_INFO_SCHEMA = (
|
||||
"CREATE TABLE index_info(namespace char(256), index_name char(256), property char(40), "
|
||||
"primary key (namespace, index_name)) without rowid"
|
||||
)
|
||||
|
||||
if INDEX_INFO_SCHEMA in schema:
|
||||
schema.remove(INDEX_INFO_SCHEMA)
|
||||
self.book.is_dictionary = True
|
||||
for namespace, index_name, property in cursor.execute(
|
||||
"SELECT * FROM index_info;"
|
||||
):
|
||||
if namespace != "dictionary" or property != "yj.dictionary.term":
|
||||
log.error(
|
||||
"unexpected index_info: namespace=%s, index_name=%s, property=%s"
|
||||
% (namespace, index_name, property)
|
||||
)
|
||||
|
||||
table_name = "index_%s_%s" % (namespace, index_name)
|
||||
index_schema = (
|
||||
"CREATE TABLE %s ([%s] char(256), id char(40), "
|
||||
"primary key ([%s], id)) without rowid"
|
||||
) % (table_name, property, property)
|
||||
|
||||
if index_schema in schema:
|
||||
schema.remove(index_schema)
|
||||
num_entries = 0
|
||||
index_words = set()
|
||||
index_kfx_ids = set()
|
||||
|
||||
for dictionary_term, kfx_id in cursor.execute(
|
||||
"SELECT * FROM %s;" % table_name
|
||||
):
|
||||
num_entries += 1
|
||||
dictionary_index_terms.add((dictionary_term, IS(kfx_id)))
|
||||
index_words.add(dictionary_term)
|
||||
index_kfx_ids.add(kfx_id)
|
||||
|
||||
if dictionary_term < first_head_word or not first_head_word:
|
||||
first_head_word = dictionary_term
|
||||
|
||||
log.info(
|
||||
"Dictionary %s table has %d entries with %d terms and %d definitions"
|
||||
% (
|
||||
table_name,
|
||||
num_entries,
|
||||
len(index_words),
|
||||
len(index_kfx_ids),
|
||||
)
|
||||
)
|
||||
|
||||
else:
|
||||
log.error("KPF database is missing the '%s' table" % table_name)
|
||||
|
||||
self.eid_symbol = {}
|
||||
KFXID_TRANSLATION_SCHEMA = "CREATE TABLE kfxid_translation(eid INTEGER, kfxid char(40), primary key(eid)) without rowid"
|
||||
if KFXID_TRANSLATION_SCHEMA in schema:
|
||||
schema.remove(KFXID_TRANSLATION_SCHEMA)
|
||||
for eid, kfx_id in cursor.execute("SELECT * FROM kfxid_translation;"):
|
||||
self.eid_symbol[eid] = self.create_local_symbol(kfx_id)
|
||||
|
||||
self.element_type = {}
|
||||
FRAGMENT_PROPERTIES_SCHEMA = (
|
||||
"CREATE TABLE fragment_properties(id char(40), key char(40), value char(40), "
|
||||
"primary key (id, key, value)) without rowid"
|
||||
)
|
||||
if FRAGMENT_PROPERTIES_SCHEMA in schema:
|
||||
schema.remove(FRAGMENT_PROPERTIES_SCHEMA)
|
||||
for id, key, value in cursor.execute("SELECT * FROM fragment_properties;"):
|
||||
if key == "child":
|
||||
pass
|
||||
elif key == "element_type":
|
||||
self.element_type[id] = value
|
||||
else:
|
||||
log.error(
|
||||
"fragment_property has unknown key: id=%s key=%s value=%s"
|
||||
% (id, key, value)
|
||||
)
|
||||
|
||||
self.max_eid_in_sections = None
|
||||
FRAGMENTS_SCHEMA = "CREATE TABLE fragments(id char(40), payload_type char(10), payload_value blob, primary key (id))"
|
||||
if FRAGMENTS_SCHEMA in schema:
|
||||
schema.remove(FRAGMENTS_SCHEMA)
|
||||
|
||||
for id in ["$ion_symbol_table", "max_id"]:
|
||||
rows = cursor.execute(
|
||||
"SELECT payload_value FROM fragments WHERE id = ? AND payload_type = 'blob';",
|
||||
(id,),
|
||||
).fetchall()
|
||||
if rows:
|
||||
payload_data = self.prep_payload_blob(rows[0][0])
|
||||
if payload_data is None:
|
||||
pass
|
||||
elif id == "$ion_symbol_table":
|
||||
self.symtab.creating_yj_local_symbols = True
|
||||
sym_import = IonBinary(self.symtab).deserialize_annotated_value(
|
||||
payload_data,
|
||||
expect_annotation="$ion_symbol_table",
|
||||
import_symbols=True,
|
||||
)
|
||||
self.symtab.creating_yj_local_symbols = False
|
||||
if DEBUG:
|
||||
log.info(
|
||||
"kdf symbol import = %s" % json_serialize(sym_import)
|
||||
)
|
||||
|
||||
self.fragments.append(YJFragment(sym_import))
|
||||
break
|
||||
else:
|
||||
max_id = IonBinary(self.symtab).deserialize_single_value(
|
||||
payload_data
|
||||
)
|
||||
if DEBUG:
|
||||
log.info("kdf max_id = %d" % max_id)
|
||||
|
||||
self.symtab.clear()
|
||||
self.symtab.import_shared_symbol_table(
|
||||
"YJ_symbols",
|
||||
max_id=max_id - len(SYSTEM_SYMBOL_TABLE.symbols),
|
||||
)
|
||||
self.fragments.append(YJFragment(self.symtab.create_import()))
|
||||
|
||||
for id, payload_type, payload_value in cursor.execute(
|
||||
"SELECT * FROM fragments;"
|
||||
):
|
||||
ftype = id
|
||||
|
||||
if payload_type == "blob":
|
||||
payload_data = self.prep_payload_blob(payload_value)
|
||||
|
||||
if id in ["max_id", "$ion_symbol_table"]:
|
||||
pass
|
||||
|
||||
elif payload_data is None:
|
||||
ftype = self.element_type.get(id)
|
||||
|
||||
elif id == "max_eid_in_sections":
|
||||
ftype = None
|
||||
self.max_eid_in_sections = IonBinary(
|
||||
self.symtab
|
||||
).deserialize_single_value(payload_data)
|
||||
if self.book.is_dictionary:
|
||||
pass
|
||||
else:
|
||||
log.warning(
|
||||
"Unexpected max_eid_in_sections for non-dictionary: %d"
|
||||
% self.max_eid_in_sections
|
||||
)
|
||||
|
||||
elif not payload_data.startswith(IonBinary.SIGNATURE):
|
||||
ftype = None
|
||||
self.fragments.append(
|
||||
YJFragment(
|
||||
ftype="$417",
|
||||
fid=self.create_local_symbol(id),
|
||||
value=IonBLOB(payload_data),
|
||||
)
|
||||
)
|
||||
|
||||
elif len(payload_data) == len(IonBinary.SIGNATURE):
|
||||
if id != "book_navigation":
|
||||
log.warning("Ignoring empty %s fragment" % id)
|
||||
|
||||
else:
|
||||
value = IonBinary(self.symtab).deserialize_annotated_value(
|
||||
payload_data
|
||||
)
|
||||
|
||||
if not isinstance(value, IonAnnotation):
|
||||
log.error(
|
||||
"KDF fragment id=%s is missing annotation: %s"
|
||||
% (id, repr(value))
|
||||
)
|
||||
continue
|
||||
elif (
|
||||
len(value.annotations) == 2
|
||||
and value.annotations[1] == "$608"
|
||||
):
|
||||
pass
|
||||
elif len(value.annotations) > 1:
|
||||
log.error(
|
||||
"KDF fragment should have one annotation: %s"
|
||||
% repr(value)
|
||||
)
|
||||
|
||||
ftype = value.annotations[0]
|
||||
|
||||
if (
|
||||
ftype in ROOT_FRAGMENT_TYPES
|
||||
): # shortcut when symbol table unavailable
|
||||
fid = None
|
||||
else:
|
||||
fid = self.create_local_symbol(id)
|
||||
|
||||
self.fragments.append(
|
||||
YJFragment(
|
||||
ftype=ftype,
|
||||
fid=fid,
|
||||
value=self.deref_kfx_ids(value.value),
|
||||
)
|
||||
)
|
||||
|
||||
elif payload_type == "path":
|
||||
ftype = "$417"
|
||||
|
||||
resource_data = self.get_resource_data(
|
||||
self.prep_payload_blob(payload_value).decode("utf8")
|
||||
)
|
||||
if resource_data is not None:
|
||||
self.fragments.append(
|
||||
YJFragment(
|
||||
ftype=ftype,
|
||||
fid=self.create_local_symbol(id),
|
||||
value=IonBLOB(resource_data),
|
||||
)
|
||||
)
|
||||
|
||||
else:
|
||||
log.error(
|
||||
"Unexpected KDF payload_type=%s, id=%s, value=%d bytes"
|
||||
% (payload_type, id, len(payload_value))
|
||||
)
|
||||
|
||||
else:
|
||||
log.error("KPF database is missing the 'fragments' table")
|
||||
|
||||
GC_FRAGMENT_PROPERTIES_SCHEMA = (
|
||||
"CREATE TABLE gc_fragment_properties(id varchar(40), key varchar(40), "
|
||||
"value varchar(40), primary key (id, key, value)) without rowid"
|
||||
)
|
||||
if GC_FRAGMENT_PROPERTIES_SCHEMA in schema:
|
||||
schema.remove(GC_FRAGMENT_PROPERTIES_SCHEMA)
|
||||
|
||||
GC_REACHABLE_SCHEMA = (
|
||||
"CREATE TABLE gc_reachable(id varchar(40), primary key (id)) without rowid"
|
||||
)
|
||||
if GC_REACHABLE_SCHEMA in schema:
|
||||
schema.remove(GC_REACHABLE_SCHEMA)
|
||||
|
||||
CAPABILITIES_SCHEMA = "CREATE TABLE capabilities(key char(20), version smallint, primary key (key, version)) without rowid"
|
||||
if CAPABILITIES_SCHEMA in schema:
|
||||
schema.remove(CAPABILITIES_SCHEMA)
|
||||
capabilities = cursor.execute("SELECT * FROM capabilities;").fetchall()
|
||||
|
||||
if capabilities:
|
||||
format_capabilities = [
|
||||
IonStruct(IS("$492"), key, IS("version"), version)
|
||||
for key, version in capabilities
|
||||
]
|
||||
self.fragments.append(
|
||||
YJFragment(ftype="$593", value=format_capabilities)
|
||||
)
|
||||
else:
|
||||
log.error("KPF database is missing the 'capabilities' table")
|
||||
|
||||
if len(schema) > 0:
|
||||
for s in list(schema):
|
||||
log.error("Unexpected KDF database schema: %s" % s)
|
||||
|
||||
cursor.close()
|
||||
conn.close()
|
||||
|
||||
self.book.is_kpf_prepub = True
|
||||
book_metadata_fragment = self.fragments.get("$490")
|
||||
if book_metadata_fragment is not None:
|
||||
for cm in book_metadata_fragment.value.get("$491", {}):
|
||||
if cm.get("$495", "") == "kindle_title_metadata":
|
||||
for kv in cm.get("$258", []):
|
||||
if kv.get("$492", "") in [
|
||||
"ASIN",
|
||||
"asset_id",
|
||||
"cde_content_type",
|
||||
"content_id",
|
||||
]:
|
||||
self.book.is_kpf_prepub = False
|
||||
break
|
||||
break
|
||||
|
||||
self.fragments.append(
|
||||
YJFragment(
|
||||
ftype="$270",
|
||||
value=IonStruct(
|
||||
IS("$587"), "", IS("$588"), "", IS("$161"), CONTAINER_FORMAT_KPF
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
if self.kcb_datafile is not None and self.kcb_data is not None:
|
||||
source_path = self.kcb_data.get("metadata", {}).get("source_path")
|
||||
if source_path and os.path.splitext(source_path)[1] in [".epub", ".zip"]:
|
||||
epub_file = self.get_kpf_file(source_path)
|
||||
if epub_file is not None:
|
||||
zip_file = io.BytesIO(epub_file.get_data())
|
||||
self.source_epub = SourceEpub(zip_file)
|
||||
zip_file.close()
|
||||
|
||||
def prep_payload_blob(self, data):
|
||||
data = io.BytesIO(data).read()
|
||||
|
||||
if not data.startswith(DRMION_SIGNATURE):
|
||||
return data
|
||||
|
||||
if self.ignore_drm:
|
||||
return None
|
||||
|
||||
raise KFXDRMError("Book container has DRM and cannot be converted")
|
||||
|
||||
def create_local_symbol(self, symbol):
|
||||
return self.book.create_local_symbol(symbol)
|
||||
|
||||
def get_resource_data(self, filename, report_missing=True):
|
||||
try:
|
||||
resource_datafile = self.kdf_datafile.relative_datafile(filename)
|
||||
return resource_datafile.get_data()
|
||||
except Exception:
|
||||
if report_missing:
|
||||
log.error("Missing resource in KPF: %s" % filename)
|
||||
|
||||
return None
|
||||
|
||||
def get_kpf_file(self, filename, report_missing=True):
|
||||
try:
|
||||
return self.kcb_datafile.relative_datafile(filename)
|
||||
except Exception:
|
||||
if report_missing:
|
||||
log.error("Missing file in KPF: %s" % filename)
|
||||
|
||||
return None
|
||||
|
||||
def deref_kfx_ids(self, data):
|
||||
def process(data):
|
||||
data_type = ion_type(data)
|
||||
|
||||
if data_type is IonAnnotation:
|
||||
if data.is_annotation("$598"):
|
||||
val = data.value
|
||||
val_type = ion_type(val)
|
||||
|
||||
if val_type is IonString:
|
||||
return self.create_local_symbol(val)
|
||||
elif val_type is IonInt:
|
||||
value = self.eid_symbol.get(val)
|
||||
if value is not None:
|
||||
return value
|
||||
else:
|
||||
log.error("Undefined kfx_id annotation eid: %d" % val)
|
||||
else:
|
||||
log.error(
|
||||
"Unexpected data type for kfx_id annotation: %s" % val_type
|
||||
)
|
||||
|
||||
return val
|
||||
|
||||
process(data.value)
|
||||
|
||||
if data_type is IonList or data_type is IonSExp:
|
||||
for i, val in enumerate(list(data)):
|
||||
new_val = process(val)
|
||||
if new_val is not None:
|
||||
data.pop(i)
|
||||
data.insert(i, new_val)
|
||||
|
||||
if data_type is IonStruct:
|
||||
for key, val in data.items():
|
||||
new_val = process(val)
|
||||
if new_val is not None:
|
||||
data[key] = new_val
|
||||
|
||||
return None
|
||||
|
||||
if not RETAIN_KFX_ID_ANNOT:
|
||||
process(data)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
class SQLiteFingerprintWrapper(object):
|
||||
FINGERPRINT_OFFSET = 1024
|
||||
FINGERPRINT_RECORD_LEN = 1024
|
||||
DATA_RECORD_LEN = 1024
|
||||
DATA_RECORD_COUNT = 1024
|
||||
|
||||
FINGERPRINT_SIGNATURE = b"\xfa\x50\x0a\x5f"
|
||||
|
||||
def __init__(self, datafile):
|
||||
self.datafile = datafile
|
||||
|
||||
def remove(self):
|
||||
data = self.datafile.get_data()
|
||||
|
||||
if (
|
||||
len(data) < self.FINGERPRINT_OFFSET + self.FINGERPRINT_RECORD_LEN
|
||||
or data[
|
||||
self.FINGERPRINT_OFFSET : self.FINGERPRINT_OFFSET
|
||||
+ len(self.FINGERPRINT_SIGNATURE)
|
||||
]
|
||||
!= self.FINGERPRINT_SIGNATURE
|
||||
):
|
||||
return self.datafile
|
||||
|
||||
fingerprint_count = 0
|
||||
data_offset = self.FINGERPRINT_OFFSET
|
||||
|
||||
while len(data) >= data_offset + self.FINGERPRINT_RECORD_LEN:
|
||||
fingerprint = Deserializer(
|
||||
data[data_offset : data_offset + self.FINGERPRINT_RECORD_LEN]
|
||||
)
|
||||
|
||||
signature = fingerprint.extract(4)
|
||||
if signature != self.FINGERPRINT_SIGNATURE:
|
||||
log.error(
|
||||
"Unexpected fingerprint %d signature: %s"
|
||||
% (fingerprint_count, bytes_to_separated_hex(signature))
|
||||
)
|
||||
return self.datafile
|
||||
|
||||
data = (
|
||||
data[:data_offset] + data[data_offset + self.FINGERPRINT_RECORD_LEN :]
|
||||
)
|
||||
fingerprint_count += 1
|
||||
data_offset += self.DATA_RECORD_LEN * self.DATA_RECORD_COUNT
|
||||
|
||||
log.info("Removed %d KDF SQLite file fingerprint(s)" % fingerprint_count)
|
||||
|
||||
return DataFile(self.datafile.name + "-unwrapped", data)
|
||||
33
kindle_download_helper/third_party/kfxlib/message_logging.py
vendored
Normal file
33
kindle_download_helper/third_party/kfxlib/message_logging.py
vendored
Normal file
@@ -0,0 +1,33 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import logging
|
||||
import threading
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2016-2022, John Howell <jhowell@acm.org>"
|
||||
|
||||
|
||||
thread_local_cfg = threading.local()
|
||||
|
||||
|
||||
def set_logger(logger=None):
|
||||
global thread_local_cfg
|
||||
|
||||
if log is not None:
|
||||
thread_local_cfg.logger = logger
|
||||
else:
|
||||
del thread_local_cfg.logger
|
||||
|
||||
return logger
|
||||
|
||||
|
||||
def get_current_logger():
|
||||
return getattr(thread_local_cfg, "logger", logging)
|
||||
|
||||
|
||||
class LogCurrent(object):
|
||||
def __getattr__(self, method_name):
|
||||
return getattr(get_current_logger(), method_name)
|
||||
|
||||
|
||||
log = LogCurrent()
|
||||
1426
kindle_download_helper/third_party/kfxlib/original_source_epub.py
vendored
Normal file
1426
kindle_download_helper/third_party/kfxlib/original_source_epub.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
141
kindle_download_helper/third_party/kfxlib/python_transition.py
vendored
Normal file
141
kindle_download_helper/third_party/kfxlib/python_transition.py
vendored
Normal file
@@ -0,0 +1,141 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
"""
|
||||
from .python_transition import (IS_PYTHON2, bytes_, bytes_indexed, bytes_to_hex, bytes_to_list)
|
||||
if IS_PYTHON2:
|
||||
from .python_transition import (chr, html, http, repr, str, urllib)
|
||||
else:
|
||||
import html
|
||||
import html.parser
|
||||
import html.entities
|
||||
import http.client
|
||||
import http.cookiejar
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
"""
|
||||
|
||||
|
||||
import sys
|
||||
|
||||
IS_PYTHON2 = sys.version_info[0] == 2
|
||||
|
||||
if IS_PYTHON2:
|
||||
import cgi
|
||||
from urllib import quote, quote_plus, unquote, urlencode
|
||||
|
||||
import cookielib
|
||||
import htmlentitydefs
|
||||
import HTMLParser
|
||||
import httplib
|
||||
from urllib2 import (
|
||||
HTTPCookieProcessor,
|
||||
HTTPError,
|
||||
HTTPHandler,
|
||||
HTTPRedirectHandler,
|
||||
HTTPSHandler,
|
||||
Request,
|
||||
build_opener,
|
||||
)
|
||||
from urlparse import parse_qs, urljoin, urlparse, urlunparse
|
||||
|
||||
class Object(object):
|
||||
pass
|
||||
|
||||
html = Object()
|
||||
html.entities = htmlentitydefs
|
||||
html.escape = cgi.escape
|
||||
html.parser = HTMLParser
|
||||
html.unescape = HTMLParser.HTMLParser().unescape
|
||||
|
||||
http = Object()
|
||||
http.client = httplib
|
||||
http.cookiejar = cookielib
|
||||
|
||||
parse = Object()
|
||||
parse.parse_qs = parse_qs
|
||||
parse.quote = quote
|
||||
parse.quote_plus = quote_plus
|
||||
parse.unquote = unquote
|
||||
parse.urlencode = urlencode
|
||||
parse.urljoin = urljoin
|
||||
parse.urlparse = urlparse
|
||||
parse.urlunparse = urlunparse
|
||||
|
||||
request = Object()
|
||||
request.build_opener = build_opener
|
||||
request.HTTPCookieProcessor = HTTPCookieProcessor
|
||||
request.HTTPError = HTTPError
|
||||
request.HTTPHandler = HTTPHandler
|
||||
request.HTTPSHandler = HTTPSHandler
|
||||
request.HTTPRedirectHandler = HTTPRedirectHandler
|
||||
request.Request = Request
|
||||
|
||||
urllib = Object()
|
||||
urllib.parse = parse
|
||||
urllib.request = request
|
||||
|
||||
try:
|
||||
unicode
|
||||
unichr
|
||||
except NameError:
|
||||
unicode = unichr = None
|
||||
|
||||
py2_chr = chr
|
||||
str = unicode
|
||||
chr = unichr
|
||||
|
||||
def repr(obj):
|
||||
return obj.__repr__()
|
||||
|
||||
class bytes_(bytes):
|
||||
def __new__(cls, x):
|
||||
if isinstance(x, bytes):
|
||||
return x
|
||||
|
||||
if isinstance(x, bytearray):
|
||||
return bytes(x)
|
||||
|
||||
if isinstance(x, int):
|
||||
return b"\x00" * x
|
||||
|
||||
if isinstance(x, list):
|
||||
return b"".join(py2_chr(i) for i in x)
|
||||
|
||||
raise TypeError("Cannot convert %s to bytes" % type(x).__name__)
|
||||
|
||||
@staticmethod
|
||||
def fromhex(s):
|
||||
if not isinstance(s, str):
|
||||
raise TypeError("fromhex %s" % type(s).__name__)
|
||||
|
||||
return s.decode("hex")
|
||||
|
||||
def bytes_indexed(b, i):
|
||||
if not isinstance(b, bytes):
|
||||
raise TypeError("bytes_indexed %s" % type(b).__name__)
|
||||
|
||||
return ord(b[i])
|
||||
|
||||
def bytes_to_hex(b):
|
||||
if not isinstance(b, bytes):
|
||||
raise TypeError("bytes_to_hex %s" % type(b).__name__)
|
||||
|
||||
return b.encode("hex").decode("ascii")
|
||||
|
||||
def bytes_to_list(b):
|
||||
if not isinstance(b, bytes):
|
||||
raise TypeError("bytes_to_list %s" % type(b).__name__)
|
||||
|
||||
return [ord(c) for c in list(b)]
|
||||
|
||||
else:
|
||||
bytes_ = bytes
|
||||
|
||||
def bytes_indexed(b, i):
|
||||
return b[i]
|
||||
|
||||
def bytes_to_hex(b):
|
||||
return b.hex()
|
||||
|
||||
def bytes_to_list(data):
|
||||
return list(data)
|
||||
192
kindle_download_helper/third_party/kfxlib/unpack_container.py
vendored
Normal file
192
kindle_download_helper/third_party/kfxlib/unpack_container.py
vendored
Normal file
@@ -0,0 +1,192 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import io
|
||||
import posixpath
|
||||
import zipfile
|
||||
|
||||
from .ion import IonAnnotation, IonBLOB
|
||||
from .ion_text import IonText
|
||||
from .message_logging import log
|
||||
from .utilities import (
|
||||
EXTS_OF_MIMETYPE,
|
||||
DataFile,
|
||||
font_file_ext,
|
||||
image_file_ext,
|
||||
json_serialize,
|
||||
type_name,
|
||||
)
|
||||
from .yj_container import YJContainer, YJFragment
|
||||
from .yj_structure import SYMBOL_FORMATS
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2016-2022, John Howell <jhowell@acm.org>"
|
||||
|
||||
|
||||
class IonTextContainer(YJContainer):
|
||||
def deserialize(self, ignore_drm=False):
|
||||
self.fragments.clear()
|
||||
for annot in IonText(self.symtab).deserialize_multiple_values(
|
||||
self.datafile.get_data(), import_symbols=True
|
||||
):
|
||||
if not isinstance(annot, IonAnnotation):
|
||||
raise Exception(
|
||||
"deserialize kfx ion text expected IonAnnotation but found %s"
|
||||
% type_name(annot)
|
||||
)
|
||||
|
||||
self.fragments.append(YJFragment(annot))
|
||||
|
||||
def serialize(self):
|
||||
return IonText(self.symtab).serialize_multiple_values(self.get_fragments())
|
||||
|
||||
|
||||
class ZipUnpackContainer(YJContainer):
|
||||
ADDED_EXT_FLAG_CHAR = "."
|
||||
|
||||
def deserialize(self, ignore_drm=False):
|
||||
with self.datafile.as_ZipFile() as zf:
|
||||
for info in zf.infolist():
|
||||
if info.filename == "book.ion":
|
||||
IonTextContainer(
|
||||
self.symtab,
|
||||
datafile=DataFile(info.filename, data=zf.read(info)),
|
||||
fragments=self.fragments,
|
||||
).deserialize()
|
||||
break
|
||||
else:
|
||||
raise Exception("book.ion file missing from ZipUnpackContainer")
|
||||
|
||||
fonts = set()
|
||||
for fragment in self.fragments:
|
||||
if fragment.ftype == "$262":
|
||||
fonts.add(fragment.value.get("$165"))
|
||||
|
||||
for info in zf.infolist():
|
||||
if info.filename != "book.ion" and not info.filename.endswith("/"):
|
||||
fn, ext = posixpath.splitext(info.filename)
|
||||
|
||||
fid = (
|
||||
fn[:-1]
|
||||
if ext and fn.endswith(self.ADDED_EXT_FLAG_CHAR)
|
||||
else info.filename
|
||||
)
|
||||
|
||||
self.fragments.append(
|
||||
YJFragment(
|
||||
ftype=("$418" if fid in fonts else "$417"),
|
||||
fid=fid,
|
||||
value=IonBLOB(zf.read(info)),
|
||||
)
|
||||
)
|
||||
|
||||
def serialize(self):
|
||||
desired_extension = {}
|
||||
for fragment in self.fragments.get_all("$164"):
|
||||
location = fragment.value.get("$165", "")
|
||||
extension = posixpath.splitext(location)[1]
|
||||
|
||||
if not extension:
|
||||
format = fragment.value.get("$161")
|
||||
if format in SYMBOL_FORMATS:
|
||||
extension = "." + SYMBOL_FORMATS[format]
|
||||
|
||||
if extension in ["", ".pobject"]:
|
||||
mime = fragment.value.get("$162")
|
||||
|
||||
if mime in EXTS_OF_MIMETYPE and mime != "figure":
|
||||
extension = EXTS_OF_MIMETYPE[mime][0]
|
||||
|
||||
if extension:
|
||||
if location:
|
||||
desired_extension[location] = extension
|
||||
|
||||
if "$636" in fragment.value:
|
||||
for tile_row in fragment.value["$636"]:
|
||||
for tile_location in tile_row:
|
||||
desired_extension[tile_location] = extension
|
||||
|
||||
zfile = io.BytesIO()
|
||||
|
||||
with zipfile.ZipFile(zfile, "w", compression=zipfile.ZIP_DEFLATED) as zf:
|
||||
zf.writestr(
|
||||
"book.ion",
|
||||
IonTextContainer(
|
||||
self.symtab, fragments=self.fragments.filtered(omit_resources=True)
|
||||
).serialize(),
|
||||
)
|
||||
|
||||
for ftype in ["$417", "$418"]:
|
||||
for fragment in self.fragments.get_all(ftype):
|
||||
fn = fragment.fid.tostring()
|
||||
|
||||
if not posixpath.splitext(fn)[1]:
|
||||
if ftype == "$417":
|
||||
if fn in desired_extension:
|
||||
fn += self.ADDED_EXT_FLAG_CHAR + desired_extension[fn]
|
||||
else:
|
||||
extension = image_file_ext(fragment.value)
|
||||
if extension:
|
||||
fn += self.ADDED_EXT_FLAG_CHAR + extension
|
||||
else:
|
||||
extension = font_file_ext(fragment.value)
|
||||
if extension:
|
||||
fn += self.ADDED_EXT_FLAG_CHAR + extension
|
||||
|
||||
zf.writestr(fn, bytes(fragment.value))
|
||||
|
||||
data = zfile.getvalue()
|
||||
zfile.close()
|
||||
|
||||
return data
|
||||
|
||||
|
||||
class JsonContentContainer(object):
|
||||
VERSION = "1.1"
|
||||
TYPE_TEXT = 1
|
||||
TYPE_IMAGE = 2
|
||||
TYPE_HTML = 8
|
||||
|
||||
def __init__(self, book):
|
||||
self.book = book
|
||||
|
||||
def serialize(self):
|
||||
content_pos_info = self.book.collect_content_position_info()
|
||||
data = []
|
||||
next_pid = 0
|
||||
|
||||
for chunk in content_pos_info:
|
||||
if chunk.pid != next_pid:
|
||||
log.error(
|
||||
"next PID is %d but expected %d: %s"
|
||||
% (chunk.pid, next_pid, repr(chunk))
|
||||
)
|
||||
next_pid = chunk.pid
|
||||
|
||||
if chunk.text is not None:
|
||||
if len(chunk.text) != chunk.length:
|
||||
log.error(
|
||||
"chunk length %d but have %d characters: %s"
|
||||
% (chunk.length, len(chunk.text), repr(chunk))
|
||||
)
|
||||
|
||||
entry = {}
|
||||
entry["content"] = chunk.text
|
||||
entry["position"] = chunk.pid
|
||||
entry["type"] = self.TYPE_TEXT
|
||||
data.append(entry)
|
||||
elif chunk.image_resource is not None:
|
||||
if chunk.length != 1:
|
||||
log.error(
|
||||
"chunk length %d for image: %s" % (chunk.length, repr(chunk))
|
||||
)
|
||||
|
||||
entry = {}
|
||||
entry["content"] = chunk.image_resource
|
||||
entry["position"] = chunk.pid
|
||||
entry["type"] = self.TYPE_IMAGE
|
||||
data.append(entry)
|
||||
|
||||
next_pid += chunk.length
|
||||
|
||||
content = {"data": data, "version": self.VERSION}
|
||||
return json_serialize(content, sort_keys=True, indent=2).encode("utf-8")
|
||||
1182
kindle_download_helper/third_party/kfxlib/utilities.py
vendored
Normal file
1182
kindle_download_helper/third_party/kfxlib/utilities.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
3
kindle_download_helper/third_party/kfxlib/version.py
vendored
Normal file
3
kindle_download_helper/third_party/kfxlib/version.py
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__version__ = "20220215"
|
||||
378
kindle_download_helper/third_party/kfxlib/yj_book.py
vendored
Normal file
378
kindle_download_helper/third_party/kfxlib/yj_book.py
vendored
Normal file
@@ -0,0 +1,378 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import os
|
||||
import posixpath
|
||||
import traceback
|
||||
|
||||
from .ion_symbol_table import LocalSymbolTable, SymbolTableCatalog
|
||||
from .ion_text import IonText
|
||||
from .kfx_container import MAX_KFX_CONTAINER_SIZE, KfxContainer
|
||||
from .kpf_book import KpfBook
|
||||
from .kpf_container import KpfContainer
|
||||
from .message_logging import log
|
||||
from .python_transition import IS_PYTHON2
|
||||
from .unpack_container import IonTextContainer, JsonContentContainer, ZipUnpackContainer
|
||||
from .utilities import (
|
||||
ZIP_SIGNATURE,
|
||||
DataFile,
|
||||
KFXDRMError,
|
||||
bytes_to_separated_hex,
|
||||
file_read_utf8,
|
||||
flush_unicode_cache,
|
||||
temp_file_cleanup,
|
||||
)
|
||||
from .yj_container import YJFragmentList
|
||||
from .yj_metadata import BookMetadata
|
||||
from .yj_position_location import BookPosLoc
|
||||
from .yj_structure import BookStructure
|
||||
from .yj_symbol_catalog import YJ_SYMBOLS, IonSharedSymbolTable
|
||||
|
||||
if IS_PYTHON2:
|
||||
from .python_transition import repr
|
||||
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2016-2022, John Howell <jhowell@acm.org>"
|
||||
|
||||
|
||||
class YJ_Book(BookStructure, BookPosLoc, BookMetadata, KpfBook):
|
||||
def __init__(
|
||||
self, file, credentials=[], is_netfs=False, symbol_catalog_filename=None
|
||||
):
|
||||
self.datafile = DataFile(file)
|
||||
self.credentials = credentials
|
||||
self.is_netfs = is_netfs
|
||||
self.symbol_catalog_filename = symbol_catalog_filename
|
||||
self.reported_errors = set()
|
||||
self.symtab = LocalSymbolTable(YJ_SYMBOLS.name)
|
||||
self.fragments = YJFragmentList()
|
||||
self.reported_missing_fids = set()
|
||||
self.is_kpf_prepub = self.is_dictionary = False
|
||||
self.yj_containers = []
|
||||
self.kpf_container = None
|
||||
|
||||
self.load_symbol_catalog()
|
||||
|
||||
def load_symbol_catalog(self):
|
||||
if self.symbol_catalog_filename is not None:
|
||||
if not os.path.isfile(self.symbol_catalog_filename):
|
||||
raise Exception(
|
||||
"Symbol catalog %s does not exist" % self.symbol_catalog_filename
|
||||
)
|
||||
|
||||
translation_catalog = SymbolTableCatalog()
|
||||
catalog_symtab = LocalSymbolTable(catalog=translation_catalog)
|
||||
|
||||
try:
|
||||
IonText(catalog_symtab).deserialize_multiple_values(
|
||||
file_read_utf8(self.symbol_catalog_filename), import_symbols=True
|
||||
)
|
||||
except Exception:
|
||||
log.error(
|
||||
"Failed to parse symbol catalog %s" % self.symbol_catalog_filename
|
||||
)
|
||||
raise
|
||||
|
||||
translation_symtab = translation_catalog.get_shared_symbol_table(
|
||||
YJ_SYMBOLS.name
|
||||
)
|
||||
if translation_symtab is None:
|
||||
raise Exception(
|
||||
"Symbol catalog %s does not contain a definition for YJ_symbols"
|
||||
% self.symbol_catalog_filename
|
||||
)
|
||||
|
||||
catalog_symtab.report()
|
||||
log.info(
|
||||
"Symbol catalog defines %d symbols in YJ_symbols"
|
||||
% len(translation_symtab.symbols)
|
||||
)
|
||||
else:
|
||||
translation_symtab = IonSharedSymbolTable(YJ_SYMBOLS.name)
|
||||
|
||||
self.symtab.set_translation(translation_symtab)
|
||||
|
||||
def final_actions(self, do_symtab_report=True):
|
||||
if do_symtab_report:
|
||||
self.symtab.report()
|
||||
|
||||
flush_unicode_cache()
|
||||
temp_file_cleanup()
|
||||
|
||||
def convert_to_single_kfx(self):
|
||||
self.decode_book()
|
||||
|
||||
if self.is_dictionary:
|
||||
raise Exception("Cannot serialize dictionary as KFX container")
|
||||
|
||||
if self.is_kpf_prepub:
|
||||
raise Exception("Cannot serialize KPF as KFX container without fix-up")
|
||||
|
||||
result = KfxContainer(self.symtab, fragments=self.fragments).serialize()
|
||||
|
||||
if len(result) > MAX_KFX_CONTAINER_SIZE:
|
||||
log.warning(
|
||||
"KFX container created may be too large for some devices (%d bytes)"
|
||||
% len(result)
|
||||
)
|
||||
pass
|
||||
|
||||
self.final_actions()
|
||||
return result
|
||||
|
||||
def convert_to_epub(self, epub2_desired=False):
|
||||
from .yj_to_epub import KFX_EPUB
|
||||
|
||||
self.decode_book()
|
||||
result = KFX_EPUB(self, epub2_desired).decompile_to_epub()
|
||||
self.final_actions()
|
||||
return result
|
||||
|
||||
def convert_to_pdf(self):
|
||||
from .yj_to_pdf import KFX_PDF
|
||||
|
||||
self.decode_book()
|
||||
|
||||
if self.has_pdf_resource:
|
||||
result = KFX_PDF(self).extract_pdf_resources()
|
||||
elif self.is_fixed_layout:
|
||||
result = KFX_PDF(self).convert_image_resources()
|
||||
else:
|
||||
result = None
|
||||
|
||||
self.final_actions()
|
||||
return result
|
||||
|
||||
def get_metadata(self):
|
||||
self.locate_book_datafiles()
|
||||
|
||||
yj_datafile_containers = []
|
||||
for datafile in self.container_datafiles:
|
||||
try:
|
||||
container = self.get_container(datafile, ignore_drm=True)
|
||||
if container is not None:
|
||||
container.deserialize(ignore_drm=True)
|
||||
yj_datafile_containers.append((datafile, container))
|
||||
|
||||
except Exception as e:
|
||||
log.warning(
|
||||
"Failed to extract content from %s: %s" % (datafile.name, repr(e))
|
||||
)
|
||||
|
||||
for datafile, container in yj_datafile_containers:
|
||||
try:
|
||||
self.fragments.extend(container.get_fragments())
|
||||
|
||||
except Exception as e:
|
||||
log.warning(
|
||||
"Failed to extract content from %s: %s" % (datafile.name, repr(e))
|
||||
)
|
||||
continue
|
||||
|
||||
if self.has_metadata() and self.has_cover_data():
|
||||
break
|
||||
|
||||
if not self.has_metadata():
|
||||
raise Exception("Failed to locate a KFX container with metadata")
|
||||
|
||||
self.final_actions(do_symtab_report=False)
|
||||
return self.get_yj_metadata_from_book()
|
||||
|
||||
def convert_to_kpf(
|
||||
self, conversion=None, flags=None, timeout_sec=None, cleaned_filename=None
|
||||
):
|
||||
from .generate_kpf_common import ConversionResult
|
||||
from .generate_kpf_using_cli import KPR_CLI
|
||||
|
||||
if not self.datafile.is_real_file:
|
||||
raise Exception("Cannot create KPF from stream")
|
||||
|
||||
infile = self.datafile.name
|
||||
intype = os.path.splitext(infile)[1]
|
||||
|
||||
if not conversion:
|
||||
conversion = "KPR_CLI"
|
||||
|
||||
flags = set() if flags is None else set(flags)
|
||||
|
||||
options = conversion.split("/")
|
||||
conversion_name = options[0]
|
||||
flags |= set(options[1:])
|
||||
|
||||
ALL_TYPES = [".doc", ".docx", ".epub", ".mobi", ".opf"]
|
||||
|
||||
if conversion_name == "KPR_CLI" and intype in ALL_TYPES:
|
||||
conversion_sequence = KPR_CLI()
|
||||
else:
|
||||
return ConversionResult(
|
||||
error_msg="Cannot generate KPF from %s file using %s"
|
||||
% (intype, conversion_name)
|
||||
)
|
||||
|
||||
try:
|
||||
result = conversion_sequence.convert(
|
||||
infile, flags, timeout_sec, cleaned_filename
|
||||
)
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
result = ConversionResult(error_msg=repr(e))
|
||||
|
||||
self.final_actions(do_symtab_report=False)
|
||||
return result
|
||||
|
||||
def convert_to_zip_unpack(self):
|
||||
self.decode_book()
|
||||
result = ZipUnpackContainer(self.symtab, fragments=self.fragments).serialize()
|
||||
self.final_actions()
|
||||
return result
|
||||
|
||||
def convert_to_json_content(self):
|
||||
self.decode_book()
|
||||
result = JsonContentContainer(self).serialize()
|
||||
self.final_actions()
|
||||
return result
|
||||
|
||||
def decode_book(
|
||||
self,
|
||||
set_metadata=None,
|
||||
set_approximate_pages=None,
|
||||
pure=False,
|
||||
retain_yj_locals=False,
|
||||
):
|
||||
if self.fragments:
|
||||
if (
|
||||
set_metadata is not None
|
||||
or set_approximate_pages is not None
|
||||
or retain_yj_locals
|
||||
):
|
||||
raise Exception(
|
||||
"Attempt to change metadata after book has already been decoded"
|
||||
)
|
||||
return
|
||||
|
||||
self.locate_book_datafiles()
|
||||
|
||||
for datafile in self.container_datafiles:
|
||||
log.info("Processing container: %s" % datafile.name)
|
||||
container = self.get_container(datafile)
|
||||
if container:
|
||||
container.deserialize()
|
||||
self.yj_containers.append(container)
|
||||
|
||||
for container in self.yj_containers:
|
||||
self.fragments.extend(container.get_fragments())
|
||||
|
||||
if self.is_kpf_prepub:
|
||||
self.fix_kpf_prepub_book(not pure, retain_yj_locals)
|
||||
|
||||
if True:
|
||||
self.check_consistency()
|
||||
|
||||
if not pure:
|
||||
if set_metadata is not None:
|
||||
self.set_yj_metadata_to_book(set_metadata)
|
||||
|
||||
if set_approximate_pages is not None and set_approximate_pages >= 0:
|
||||
try:
|
||||
self.create_approximate_page_list(set_approximate_pages)
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
log.error(
|
||||
"Exception creating approximate page numbers: %s" % repr(e)
|
||||
)
|
||||
|
||||
try:
|
||||
self.report_features_and_metadata(unknown_only=False)
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
log.error("Exception checking book features and metadata: %s" % repr(e))
|
||||
|
||||
self.check_fragment_usage(rebuild=not pure, ignore_extra=False)
|
||||
self.check_symbol_table(rebuild=not pure)
|
||||
|
||||
self.final_actions()
|
||||
|
||||
def locate_book_datafiles(self):
|
||||
self.container_datafiles = []
|
||||
|
||||
if self.datafile.is_real_file and os.path.isdir(self.datafile.name):
|
||||
self.locate_files_from_dir(self.datafile.name)
|
||||
|
||||
elif self.datafile.ext in [".azw8", ".ion", ".kfx", ".kpf"]:
|
||||
self.container_datafiles.append(self.datafile)
|
||||
|
||||
if self.datafile.ext == ".kfx" and self.datafile.is_real_file:
|
||||
sdr_dirname = os.path.splitext(self.datafile.name)[0] + ".sdr"
|
||||
if os.path.isdir(sdr_dirname):
|
||||
self.locate_files_from_dir(sdr_dirname)
|
||||
|
||||
elif self.datafile.ext in [".kfx-zip", ".zip"]:
|
||||
with self.datafile.as_ZipFile() as zf:
|
||||
for info in zf.infolist():
|
||||
if posixpath.basename(info.filename) in ["book.ion", "book.kdf"]:
|
||||
self.container_datafiles.append(self.datafile)
|
||||
break
|
||||
else:
|
||||
for info in zf.infolist():
|
||||
self.check_located_file(
|
||||
info.filename, zf.read(info), self.datafile
|
||||
)
|
||||
|
||||
else:
|
||||
raise Exception(
|
||||
"Unknown main file type. Must be azw8, ion, kfx, kfx-zip, kpf, or zip."
|
||||
)
|
||||
|
||||
if not self.container_datafiles:
|
||||
raise Exception("No KFX containers found. This book is not in KFX format.")
|
||||
|
||||
self.container_datafiles = sorted(self.container_datafiles)
|
||||
|
||||
def locate_files_from_dir(self, directory, match=None):
|
||||
for dirpath, dirnames, filenames in os.walk(directory):
|
||||
for fn in filenames:
|
||||
if (not match) or match == fn:
|
||||
self.check_located_file(os.path.join(dirpath, fn))
|
||||
|
||||
def check_located_file(self, name, data=None, parent=None):
|
||||
basename = posixpath.basename(name.replace("\\", "/"))
|
||||
ext = os.path.splitext(basename)[1]
|
||||
|
||||
if ext in [".azw", ".azw8", ".azw9", ".kfx", ".md", ".res", ".yj"]:
|
||||
self.container_datafiles.append(DataFile(name, data, parent))
|
||||
|
||||
def get_container(self, datafile, ignore_drm=False):
|
||||
if datafile.ext == ".ion":
|
||||
return IonTextContainer(self.symtab, datafile)
|
||||
data = datafile.get_data()
|
||||
|
||||
if data.startswith(ZIP_SIGNATURE):
|
||||
with datafile.as_ZipFile() as zf:
|
||||
for info in zf.infolist():
|
||||
if posixpath.basename(info.filename) in ["book.ion", "book.kdf"]:
|
||||
if info.filename.endswith(".kdf"):
|
||||
return KpfContainer(self.symtab, datafile, book=self)
|
||||
else:
|
||||
return ZipUnpackContainer(self.symtab, datafile)
|
||||
|
||||
if data.startswith(KpfContainer.KDF_SIGNATURE):
|
||||
return KpfContainer(self.symtab, datafile, book=self)
|
||||
|
||||
if data.startswith(KfxContainer.SIGNATURE):
|
||||
return KfxContainer(self.symtab, datafile)
|
||||
|
||||
if data.startswith(KfxContainer.DRM_SIGNATURE):
|
||||
if ignore_drm:
|
||||
return None
|
||||
|
||||
raise KFXDRMError(
|
||||
"Book container %s has DRM and cannot be converted" % datafile.name
|
||||
)
|
||||
|
||||
if data[0x3C : 0x3C + 8] in [b"BOOKMOBI", b"RBINCONT"]:
|
||||
raise Exception("File format is MOBI (not KFX) for %s" % datafile.name)
|
||||
|
||||
raise Exception(
|
||||
"Unable to determine KFX container type of %s (%s)"
|
||||
% (datafile.name, bytes_to_separated_hex(data[:8]))
|
||||
)
|
||||
404
kindle_download_helper/third_party/kfxlib/yj_container.py
vendored
Normal file
404
kindle_download_helper/third_party/kfxlib/yj_container.py
vendored
Normal file
@@ -0,0 +1,404 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import collections
|
||||
import functools
|
||||
|
||||
from .ion import IonAnnotation, IonAnnots, IonBLOB, IonList, IonSymbol, ion_type
|
||||
from .python_transition import IS_PYTHON2
|
||||
from .utilities import list_symbols, natural_sort_key, type_name
|
||||
|
||||
if IS_PYTHON2:
|
||||
from .python_transition import repr, str
|
||||
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2016-2022, John Howell <jhowell@acm.org>"
|
||||
|
||||
|
||||
DRMION_SIGNATURE = b"\xeaDRMION\xee"
|
||||
|
||||
CONTAINER_FORMAT_KPF = "KPF"
|
||||
CONTAINER_FORMAT_KFX_MAIN = "KFX main"
|
||||
CONTAINER_FORMAT_KFX_METADATA = "KFX metadata"
|
||||
CONTAINER_FORMAT_KFX_ATTACHABLE = "KFX attachable"
|
||||
|
||||
|
||||
RAW_FRAGMENT_TYPES = {"$418", "$417"}
|
||||
|
||||
|
||||
PREFERED_FRAGMENT_TYPE_ORDER = [
|
||||
"$ion_symbol_table",
|
||||
"$270",
|
||||
"$593",
|
||||
"$585",
|
||||
"$490",
|
||||
"$258",
|
||||
"$538",
|
||||
"$389",
|
||||
"$390",
|
||||
"$260",
|
||||
"$259",
|
||||
"$608",
|
||||
"$145",
|
||||
"$756",
|
||||
"$692",
|
||||
"$157",
|
||||
"$391",
|
||||
"$266",
|
||||
"$394",
|
||||
"$264",
|
||||
"$265",
|
||||
"$550",
|
||||
"$609",
|
||||
"$621",
|
||||
"$611",
|
||||
"$610",
|
||||
"$597",
|
||||
"$267",
|
||||
"$387",
|
||||
"$395",
|
||||
"$262",
|
||||
"$164",
|
||||
"$418",
|
||||
"$417",
|
||||
"$419",
|
||||
]
|
||||
|
||||
|
||||
ROOT_FRAGMENT_TYPES = {
|
||||
"$ion_symbol_table",
|
||||
"$270",
|
||||
"$490",
|
||||
"$389",
|
||||
"$419",
|
||||
"$585",
|
||||
"$538",
|
||||
"$262",
|
||||
"$593",
|
||||
"$550",
|
||||
"$258",
|
||||
"$265",
|
||||
"$264",
|
||||
"$395",
|
||||
"$390",
|
||||
"$621",
|
||||
"$611",
|
||||
}
|
||||
|
||||
|
||||
SINGLETON_FRAGMENT_TYPES = ROOT_FRAGMENT_TYPES - {
|
||||
"$270",
|
||||
"$262",
|
||||
"$593",
|
||||
}
|
||||
|
||||
|
||||
REQUIRED_BOOK_FRAGMENT_TYPES = {
|
||||
"$ion_symbol_table",
|
||||
"$270",
|
||||
"$490",
|
||||
"$389",
|
||||
"$419",
|
||||
"$538",
|
||||
"$550",
|
||||
"$258",
|
||||
"$265",
|
||||
"$264",
|
||||
"$611",
|
||||
}
|
||||
|
||||
|
||||
ALLOWED_BOOK_FRAGMENT_TYPES = {
|
||||
"$266",
|
||||
"$597",
|
||||
"$418",
|
||||
"$417",
|
||||
"$394",
|
||||
"$145",
|
||||
"$585",
|
||||
"$610",
|
||||
"$164",
|
||||
"$262",
|
||||
"$593",
|
||||
"$391",
|
||||
"$692",
|
||||
"$387",
|
||||
"$395",
|
||||
"$756",
|
||||
"$260",
|
||||
"$267",
|
||||
"$390",
|
||||
"$609",
|
||||
"$259",
|
||||
"$608",
|
||||
"$157",
|
||||
"$621",
|
||||
}
|
||||
|
||||
|
||||
KNOWN_FRAGMENT_TYPES = REQUIRED_BOOK_FRAGMENT_TYPES | ALLOWED_BOOK_FRAGMENT_TYPES
|
||||
|
||||
|
||||
CONTAINER_FRAGMENT_TYPES = [
|
||||
"$270",
|
||||
"$593",
|
||||
"$ion_symbol_table",
|
||||
"$419",
|
||||
]
|
||||
|
||||
|
||||
class YJContainer(object):
|
||||
def __init__(self, symtab, datafile=None, fragments=None):
|
||||
self.symtab = symtab
|
||||
self.datafile = datafile
|
||||
self.fragments = YJFragmentList() if fragments is None else fragments
|
||||
|
||||
def get_fragments(self):
|
||||
return self.fragments
|
||||
|
||||
|
||||
@functools.total_ordering
|
||||
class YJFragmentKey(IonAnnots):
|
||||
def __new__(cls, arg=None, ftype=None, fid=None, annot=None):
|
||||
if arg is not None:
|
||||
raise Exception("YJFragmentKey initializer missing keyword")
|
||||
|
||||
if annot is not None:
|
||||
return IonAnnots.__new__(cls, tuple(annot))
|
||||
|
||||
if fid is None:
|
||||
return IonAnnots.__new__(cls, [IonSymbol(ftype)])
|
||||
|
||||
if ftype is None:
|
||||
return IonAnnots.__new__(cls, [IonSymbol(fid)])
|
||||
|
||||
return IonAnnots.__new__(cls, [IonSymbol(fid), IonSymbol(ftype)])
|
||||
|
||||
def sort_key(self):
|
||||
return (
|
||||
PREFERED_FRAGMENT_TYPE_ORDER.index(self.ftype)
|
||||
if self.ftype in PREFERED_FRAGMENT_TYPE_ORDER
|
||||
else len(PREFERED_FRAGMENT_TYPE_ORDER),
|
||||
natural_sort_key(self.fid),
|
||||
)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, YJFragment):
|
||||
return self == other.annotations
|
||||
|
||||
if isinstance(other, YJFragmentKey):
|
||||
return tuple(self) == tuple(other)
|
||||
|
||||
raise Exception("YJFragmentKey __eq__: comparing with %s" % type_name(other))
|
||||
|
||||
def __lt__(self, other):
|
||||
if isinstance(other, YJFragment):
|
||||
return self < other.annotations
|
||||
|
||||
if isinstance(other, YJFragmentKey):
|
||||
return self.sort_key() < other.sort_key()
|
||||
|
||||
raise Exception("YJFragmentKey __lt__: comparing with %s" % type_name(other))
|
||||
|
||||
def __hash__(self):
|
||||
return hash(tuple(self))
|
||||
|
||||
@property
|
||||
def fid(self):
|
||||
return self[0]
|
||||
|
||||
@fid.setter
|
||||
def fid(self, value):
|
||||
raise Exception("Attempt to modify YJFragmentKey fid")
|
||||
|
||||
@property
|
||||
def ftype(self):
|
||||
return self[-1]
|
||||
|
||||
@ftype.setter
|
||||
def ftype(self, value):
|
||||
raise Exception("Attempt to modify YJFragmentKey ftype")
|
||||
|
||||
|
||||
@functools.total_ordering
|
||||
class YJFragment(IonAnnotation):
|
||||
def __init__(self, arg=None, ftype=None, fid=None, value=None):
|
||||
if isinstance(arg, YJFragmentKey):
|
||||
IonAnnotation.__init__(self, arg, value)
|
||||
elif isinstance(arg, IonAnnotation):
|
||||
IonAnnotation.__init__(
|
||||
self, YJFragmentKey(annot=arg.annotations), arg.value
|
||||
)
|
||||
else:
|
||||
IonAnnotation.__init__(self, YJFragmentKey(ftype=ftype, fid=fid), value)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.annotations)
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, YJFragment):
|
||||
return self.annotations == other.annotations
|
||||
|
||||
if isinstance(other, YJFragmentKey):
|
||||
return self.annotations == other
|
||||
|
||||
raise Exception("YJFragment __eq__: comparing with %s" % type_name(other))
|
||||
|
||||
def __lt__(self, other):
|
||||
if isinstance(other, YJFragment):
|
||||
return self.annotations < other.annotations
|
||||
|
||||
if isinstance(other, YJFragmentKey):
|
||||
return self.annotations < other
|
||||
|
||||
raise Exception("YJFragment __lt__: comparing with %s" % type_name(other))
|
||||
|
||||
@property
|
||||
def fid(self):
|
||||
return self.annotations[0]
|
||||
|
||||
@fid.setter
|
||||
def fid(self, value):
|
||||
raise Exception("Attempt to modify YJFragment fid")
|
||||
|
||||
@property
|
||||
def ftype(self):
|
||||
return self.annotations[-1]
|
||||
|
||||
@ftype.setter
|
||||
def ftype(self, value):
|
||||
raise Exception("Attempt to modify YJFragment ftype")
|
||||
|
||||
|
||||
class YJFragmentList(IonList):
|
||||
def __init__(self, *args):
|
||||
IonList.__init__(self, *args)
|
||||
self.yj_dirty = True
|
||||
self.yj_ftype_index = collections.defaultdict(list)
|
||||
self.yj_fragment_index = collections.defaultdict(list)
|
||||
|
||||
def yj_rebuild_index(self):
|
||||
self.yj_ftype_index.clear()
|
||||
self.yj_fragment_index.clear()
|
||||
|
||||
for f in self:
|
||||
if not isinstance(f, YJFragment):
|
||||
raise Exception(
|
||||
"YJFragmentList contains non-YJFragment: %s" % type_name(f)
|
||||
)
|
||||
|
||||
self.yj_ftype_index[f.ftype].append(f)
|
||||
self.yj_fragment_index[f].append(f)
|
||||
|
||||
self.yj_dirty = False
|
||||
|
||||
def get_all(self, ftype=None):
|
||||
return self.get(ftype=ftype, all=True)
|
||||
|
||||
def get(self, ftype=None, default=None, fid=None, first=False, all=False):
|
||||
key = ftype
|
||||
|
||||
if isinstance(key, int):
|
||||
return list.__getitem__(self, key)
|
||||
|
||||
if self.yj_dirty:
|
||||
self.yj_rebuild_index()
|
||||
|
||||
if isinstance(key, YJFragmentKey):
|
||||
matches = self.yj_fragment_index.get(key, [])
|
||||
elif fid is not None:
|
||||
key = YJFragmentKey(ftype=ftype, fid=fid)
|
||||
matches = self.yj_fragment_index.get(key, [])
|
||||
else:
|
||||
matches = self.yj_ftype_index.get(ftype, [])
|
||||
|
||||
if all:
|
||||
return list(matches)
|
||||
|
||||
if not matches:
|
||||
return default
|
||||
|
||||
if len(matches) > 1 and not first:
|
||||
raise KeyError(
|
||||
"YJFragmentList get has multiple matches for %s: %s"
|
||||
% (repr(key), list_symbols(matches))
|
||||
)
|
||||
|
||||
return matches[0]
|
||||
|
||||
def __getitem__(self, key):
|
||||
fragment = self.get(key)
|
||||
if fragment is None:
|
||||
raise KeyError("YJFragmentList item is missing: %s" % repr(key))
|
||||
|
||||
return fragment
|
||||
|
||||
def append(self, value):
|
||||
if not isinstance(value, YJFragment):
|
||||
raise Exception(
|
||||
"YJFragmentList append non-YJFragment: %s" % type_name(value)
|
||||
)
|
||||
|
||||
IonList.append(self, value)
|
||||
self.yj_dirty = True
|
||||
|
||||
def extend(self, values):
|
||||
if not isinstance(values, YJFragmentList):
|
||||
raise Exception(
|
||||
"YJFragmentList extend non-YJFragmentList: %s" % type_name(values)
|
||||
)
|
||||
|
||||
IonList.extend(self, values)
|
||||
self.yj_dirty = True
|
||||
|
||||
def remove(self, value):
|
||||
if not self.discard(value):
|
||||
raise KeyError("YJFragmentList remove, item is missing: %s" % str(value))
|
||||
|
||||
def discard(self, value):
|
||||
if not isinstance(value, YJFragment):
|
||||
raise Exception(
|
||||
"YJFragmentList remove non-YJFragment: %s" % type_name(value)
|
||||
)
|
||||
|
||||
for i, f in enumerate(self):
|
||||
if f is value:
|
||||
self.pop(i)
|
||||
self.yj_dirty = True
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def ftypes(self):
|
||||
if self.yj_dirty:
|
||||
self.yj_rebuild_index()
|
||||
|
||||
return set(self.yj_ftype_index.keys())
|
||||
|
||||
def filtered(self, omit_resources=False, omit_large_blobs=False):
|
||||
if not (omit_resources or omit_large_blobs):
|
||||
return self
|
||||
|
||||
filtered_fragments = YJFragmentList()
|
||||
for fragment in list(self):
|
||||
if fragment.ftype in RAW_FRAGMENT_TYPES:
|
||||
if omit_resources:
|
||||
continue
|
||||
|
||||
if (
|
||||
omit_large_blobs
|
||||
and ion_type(fragment.value) is IonBLOB
|
||||
and fragment.value.is_large()
|
||||
):
|
||||
fragment = YJFragment(
|
||||
ftype=fragment.ftype,
|
||||
fid=fragment.fid,
|
||||
value=repr(fragment.value),
|
||||
)
|
||||
|
||||
filtered_fragments.append(fragment)
|
||||
|
||||
return filtered_fragments
|
||||
|
||||
def clear(self):
|
||||
del self[:]
|
||||
896
kindle_download_helper/third_party/kfxlib/yj_metadata.py
vendored
Normal file
896
kindle_download_helper/third_party/kfxlib/yj_metadata.py
vendored
Normal file
@@ -0,0 +1,896 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import io
|
||||
import random
|
||||
import string
|
||||
|
||||
from PIL import Image
|
||||
|
||||
from .ion import IS, IonBLOB, IonStruct, IonSymbol, ion_type, unannotated
|
||||
from .message_logging import log
|
||||
from .python_transition import IS_PYTHON2
|
||||
from .utilities import (
|
||||
convert_pdf_to_jpeg,
|
||||
disable_debug_log,
|
||||
jpeg_type,
|
||||
list_symbols,
|
||||
list_symbols_unsorted,
|
||||
quote_name,
|
||||
)
|
||||
from .yj_container import YJFragment, YJFragmentKey
|
||||
from .yj_structure import (
|
||||
FORMAT_SYMBOLS,
|
||||
KFX_COVER_RESOURCE,
|
||||
METADATA_NAMES,
|
||||
METADATA_SYMBOLS,
|
||||
SYMBOL_FORMATS,
|
||||
)
|
||||
from .yj_versions import (
|
||||
PACKAGE_VERSION_PLACEHOLDERS,
|
||||
is_known_feature,
|
||||
is_known_generator,
|
||||
is_known_metadata,
|
||||
)
|
||||
|
||||
if IS_PYTHON2:
|
||||
from .python_transition import repr, str
|
||||
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2016-2022, John Howell <jhowell@acm.org>"
|
||||
|
||||
|
||||
class YJ_Metadata(object):
|
||||
def __init__(self, author_sort_fn=None, replace_existing_authors_with_sort=False):
|
||||
self.authors = []
|
||||
self.author_sort_fn = (
|
||||
author_sort_name if author_sort_fn is None else author_sort_fn
|
||||
)
|
||||
self.replace_existing_authors_with_sort = replace_existing_authors_with_sort
|
||||
self.title = (
|
||||
self.cde_content_type
|
||||
) = self.asin = self.cover_image_data = self.description = None
|
||||
self.issue_date = (
|
||||
self.language
|
||||
) = self.publisher = self.book_id = self.features = self.asset_id = None
|
||||
|
||||
|
||||
class BookMetadata(object):
|
||||
def get_yj_metadata_from_book(self):
|
||||
yj_metadata = YJ_Metadata()
|
||||
authors = []
|
||||
|
||||
fragment = self.fragments.get("$490")
|
||||
if fragment is not None:
|
||||
for cm in fragment.value.get("$491", {}):
|
||||
if cm.get("$495", "") == "kindle_title_metadata":
|
||||
for kv in cm.get("$258", []):
|
||||
key = kv.get("$492", "")
|
||||
val = kv.get("$307", "")
|
||||
|
||||
if key == "author":
|
||||
authors.append(val)
|
||||
elif key == "title":
|
||||
yj_metadata.title = val
|
||||
elif key == "cde_content_type":
|
||||
yj_metadata.cde_content_type = val
|
||||
elif key == "ASIN":
|
||||
yj_metadata.asin = val
|
||||
elif key == "description":
|
||||
yj_metadata.description = val
|
||||
elif key == "issue_date":
|
||||
yj_metadata.issue_date = val
|
||||
elif key == "language":
|
||||
yj_metadata.language = val
|
||||
elif key == "publisher":
|
||||
yj_metadata.publisher = val
|
||||
elif key == "book_id":
|
||||
yj_metadata.book_id = val
|
||||
elif key == "asset_id":
|
||||
yj_metadata.asset_id = val
|
||||
|
||||
fragment = self.fragments.get("$258")
|
||||
if fragment is not None:
|
||||
for name, val in fragment.value.items():
|
||||
key = METADATA_NAMES.get(name, "")
|
||||
|
||||
if key == "author" and not authors:
|
||||
if " & " in val:
|
||||
for author in val.split("&"):
|
||||
authors.append(author.strip())
|
||||
elif " and " in val:
|
||||
auths = val.split(" and ")
|
||||
if len(auths) == 2 and "," in auths[0] and "," not in auths[1]:
|
||||
auths = auths[0].split(",") + [auths[1]]
|
||||
for author in auths:
|
||||
authors.append(author.strip())
|
||||
elif val:
|
||||
authors.append(val)
|
||||
|
||||
elif key == "title" and not yj_metadata.title:
|
||||
yj_metadata.title = val
|
||||
elif key == "cde_content_type" and not yj_metadata.cde_content_type:
|
||||
yj_metadata.cde_content_type = val
|
||||
elif key == "ASIN" and not yj_metadata.asin:
|
||||
yj_metadata.asin = val
|
||||
elif key == "description" and not yj_metadata.description:
|
||||
yj_metadata.description = val
|
||||
elif key == "issue_date" and not yj_metadata.issue_date:
|
||||
yj_metadata.issue_date = val
|
||||
elif key == "language" and not yj_metadata.language:
|
||||
yj_metadata.language = val
|
||||
elif key == "publisher" and not yj_metadata.publisher:
|
||||
yj_metadata.publisher = val
|
||||
elif key == "asset_id" and not yj_metadata.asset_id:
|
||||
yj_metadata.asset_id = val
|
||||
|
||||
yj_metadata.authors = []
|
||||
for author in authors:
|
||||
author = unsort_author_name(author)
|
||||
if author and author not in yj_metadata.authors:
|
||||
yj_metadata.authors.append(author)
|
||||
|
||||
cover_image_data = self.get_cover_image_data()
|
||||
if cover_image_data is not None:
|
||||
yj_metadata.cover_image_data = cover_image_data
|
||||
|
||||
yj_metadata.features = self.get_features()
|
||||
|
||||
return yj_metadata
|
||||
|
||||
def set_yj_metadata_to_book(self, yj_metadata):
|
||||
authors = (
|
||||
[yj_metadata.author_sort_fn(author) for author in yj_metadata.authors]
|
||||
if yj_metadata.authors is not None
|
||||
else None
|
||||
)
|
||||
|
||||
if yj_metadata.asin is True:
|
||||
yj_metadata.asin = "".join(
|
||||
random.choice(string.ascii_uppercase + string.digits) for _ in range(32)
|
||||
)
|
||||
|
||||
book_metadata_fragment = self.fragments.get("$490")
|
||||
metadata_fragment = self.fragments.get("$258")
|
||||
|
||||
if book_metadata_fragment is None and metadata_fragment is None:
|
||||
log.error("Cannot set metadata due to missing metadata fragments in book")
|
||||
|
||||
cover_image = None
|
||||
if yj_metadata.cover_image_data is not None:
|
||||
new_cover_image_data = self.fix_cover_image_data(
|
||||
yj_metadata.cover_image_data
|
||||
)
|
||||
if new_cover_image_data != self.get_cover_image_data():
|
||||
cover_image = self.set_cover_image_data(new_cover_image_data)
|
||||
|
||||
if book_metadata_fragment is not None:
|
||||
for cm in book_metadata_fragment.value.get("$491", {}):
|
||||
if cm.get("$495", "") == "kindle_title_metadata":
|
||||
new_ksv = []
|
||||
for kv in cm.get("$258", []):
|
||||
key = kv.get("$492", "")
|
||||
val = kv.get("$307", "")
|
||||
|
||||
if (
|
||||
key == "author"
|
||||
and yj_metadata.replace_existing_authors_with_sort
|
||||
):
|
||||
if authors is None:
|
||||
authors = []
|
||||
|
||||
authors.append(yj_metadata.author_sort_fn(val))
|
||||
|
||||
elif (
|
||||
(key == "author" and authors is not None)
|
||||
or (key == "title" and yj_metadata.title is not None)
|
||||
or (
|
||||
key == "cde_content_type"
|
||||
and yj_metadata.cde_content_type is not None
|
||||
)
|
||||
or (key == "ASIN" and yj_metadata.asin is not None)
|
||||
or (key == "content_id" and yj_metadata.asin is not None)
|
||||
or (key == "cover_image" and cover_image is not None)
|
||||
or (
|
||||
key == "description"
|
||||
and yj_metadata.description is not None
|
||||
)
|
||||
or (
|
||||
key == "issue_date"
|
||||
and yj_metadata.issue_date is not None
|
||||
)
|
||||
or (key == "language" and yj_metadata.language is not None)
|
||||
or (
|
||||
key == "publisher" and yj_metadata.publisher is not None
|
||||
)
|
||||
):
|
||||
pass
|
||||
|
||||
elif key:
|
||||
new_ksv.append((key, len(new_ksv), val))
|
||||
|
||||
if authors is not None:
|
||||
for author in authors:
|
||||
new_ksv.append(("author", len(new_ksv), author))
|
||||
|
||||
if yj_metadata.title is not None:
|
||||
new_ksv.append(("title", len(new_ksv), yj_metadata.title))
|
||||
|
||||
if yj_metadata.cde_content_type is not None:
|
||||
new_ksv.append(
|
||||
(
|
||||
"cde_content_type",
|
||||
len(new_ksv),
|
||||
yj_metadata.cde_content_type,
|
||||
)
|
||||
)
|
||||
|
||||
if yj_metadata.asin is not None:
|
||||
new_ksv.append(("ASIN", len(new_ksv), yj_metadata.asin))
|
||||
new_ksv.append(("content_id", len(new_ksv), yj_metadata.asin))
|
||||
|
||||
if cover_image is not None:
|
||||
new_ksv.append(("cover_image", len(new_ksv), cover_image))
|
||||
|
||||
if yj_metadata.description is not None:
|
||||
new_ksv.append(
|
||||
("description", len(new_ksv), yj_metadata.description)
|
||||
)
|
||||
|
||||
if yj_metadata.issue_date is not None:
|
||||
new_ksv.append(
|
||||
("issue_date", len(new_ksv), yj_metadata.issue_date)
|
||||
)
|
||||
|
||||
if yj_metadata.language is not None:
|
||||
new_ksv.append(("language", len(new_ksv), yj_metadata.language))
|
||||
|
||||
if yj_metadata.publisher is not None:
|
||||
new_ksv.append(
|
||||
("publisher", len(new_ksv), yj_metadata.publisher)
|
||||
)
|
||||
|
||||
cm[IS("$258")] = [
|
||||
IonStruct(IS("$492"), k, IS("$307"), v)
|
||||
for k, s, v in sorted(new_ksv)
|
||||
]
|
||||
|
||||
if metadata_fragment is not None:
|
||||
mdx = metadata_fragment.value
|
||||
|
||||
if not (len(mdx) == 0 or (len(mdx) == 1 and "$169" in mdx)):
|
||||
if authors is not None:
|
||||
mdx[IS("$222")] = " & ".join(authors)
|
||||
else:
|
||||
mdx.pop("$222", None)
|
||||
|
||||
if yj_metadata.title is not None:
|
||||
mdx[IS("$153")] = yj_metadata.title
|
||||
else:
|
||||
mdx.pop("$153", None)
|
||||
|
||||
if yj_metadata.cde_content_type is not None:
|
||||
mdx[IS("$251")] = yj_metadata.cde_content_type
|
||||
else:
|
||||
mdx.pop("$251", None)
|
||||
|
||||
if yj_metadata.asin is not None:
|
||||
mdx[IS("$224")] = yj_metadata.asin
|
||||
else:
|
||||
mdx.pop("$224", None)
|
||||
|
||||
if cover_image is not None:
|
||||
mdx[IS("$424")] = IS(cover_image)
|
||||
else:
|
||||
mdx.pop("$424", None)
|
||||
|
||||
if yj_metadata.description is not None:
|
||||
mdx[IS("$154")] = yj_metadata.description
|
||||
else:
|
||||
mdx.pop("$154", None)
|
||||
|
||||
if yj_metadata.issue_date is not None:
|
||||
mdx[IS("$219")] = yj_metadata.issue_date
|
||||
else:
|
||||
mdx.pop("$219", None)
|
||||
|
||||
if yj_metadata.language is not None:
|
||||
mdx[IS("$10")] = yj_metadata.language
|
||||
else:
|
||||
mdx.pop("$10", None)
|
||||
|
||||
if yj_metadata.publisher is not None:
|
||||
mdx[IS("$232")] = yj_metadata.publisher
|
||||
else:
|
||||
mdx.pop("$232", None)
|
||||
|
||||
def has_metadata(self):
|
||||
return (
|
||||
self.fragments.get(YJFragmentKey(ftype="$490")) is not None
|
||||
or self.fragments.get(YJFragmentKey(ftype="$258")) is not None
|
||||
)
|
||||
|
||||
def has_cover_data(self):
|
||||
return self.get_cover_image_data() is not None
|
||||
|
||||
def get_asset_id(self):
|
||||
return self.get_metadata_value("asset_id")
|
||||
|
||||
@property
|
||||
def cde_type(self):
|
||||
if not hasattr(self, "_cached_cde_type"):
|
||||
self._cached_cde_type = self.get_metadata_value("cde_content_type")
|
||||
|
||||
return self._cached_cde_type
|
||||
|
||||
@property
|
||||
def is_magazine(self):
|
||||
return self.cde_type == "MAGZ"
|
||||
|
||||
@property
|
||||
def is_sample(self):
|
||||
return self.cde_type == "EBSP"
|
||||
|
||||
@property
|
||||
def is_print_replica(self):
|
||||
if not hasattr(self, "_cached_is_print_replica"):
|
||||
self._cached_is_print_replica = (
|
||||
self.get_metadata_value(
|
||||
"yj_textbook", category="kindle_capability_metadata"
|
||||
)
|
||||
is not None
|
||||
)
|
||||
|
||||
return self._cached_is_print_replica
|
||||
|
||||
@property
|
||||
def is_fixed_layout(self):
|
||||
if not hasattr(self, "_cached_is_fixed_layout"):
|
||||
self._cached_is_fixed_layout = (
|
||||
self.get_metadata_value("yj_fixed_layout", "kindle_capability_metadata")
|
||||
is not None
|
||||
)
|
||||
|
||||
return self._cached_is_fixed_layout
|
||||
|
||||
@property
|
||||
def is_illustrated_layout(self):
|
||||
if not hasattr(self, "_cached_is_illustrated_layout"):
|
||||
self._cached_is_illustrated_layout = (
|
||||
self.get_feature_value("yj.illustrated_layout") is not None
|
||||
)
|
||||
|
||||
return self._cached_is_illustrated_layout
|
||||
|
||||
@property
|
||||
def is_conditional_structure(self):
|
||||
if not hasattr(self, "_cached_is_conditional_structure"):
|
||||
self._cached_is_conditional_structure = self.get_feature_value(
|
||||
"yj.conditional_structure"
|
||||
) is not None or (
|
||||
self.get_feature_value("reflow-style", default=0) == 5
|
||||
and not self.is_magazine
|
||||
)
|
||||
|
||||
return self._cached_is_conditional_structure
|
||||
|
||||
@property
|
||||
def is_kfx_v1(self):
|
||||
if not hasattr(self, "_cached_is_kfx_v1"):
|
||||
fragment = self.fragments.get("$270", first=True)
|
||||
self._cached_is_kfx_v1 = (
|
||||
fragment.value.get("version", 0) == 1 if fragment is not None else False
|
||||
)
|
||||
|
||||
return self._cached_is_kfx_v1
|
||||
|
||||
@property
|
||||
def has_pdf_resource(self):
|
||||
if not hasattr(self, "_cached_has_pdf_resource"):
|
||||
for fragment in self.fragments.get_all("$164"):
|
||||
if fragment.value.get("$161") == "$565":
|
||||
self._cached_has_pdf_resource = True
|
||||
break
|
||||
else:
|
||||
self._cached_has_pdf_resource = False
|
||||
|
||||
return self._cached_has_pdf_resource
|
||||
|
||||
def get_metadata_value(self, name, category="kindle_title_metadata", default=None):
|
||||
try:
|
||||
fragment = self.fragments.get("$490")
|
||||
if fragment is not None:
|
||||
for cm in fragment.value["$491"]:
|
||||
if cm["$495"] == category:
|
||||
for kv in cm["$258"]:
|
||||
if kv["$492"] == name:
|
||||
return kv["$307"]
|
||||
|
||||
metadata_symbol = METADATA_SYMBOLS.get(name)
|
||||
if metadata_symbol is not None:
|
||||
fragment = self.fragments.get("$258")
|
||||
if fragment is not None and metadata_symbol in fragment.value:
|
||||
return fragment.value[metadata_symbol]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return default
|
||||
|
||||
def get_feature_value(
|
||||
self, feature, namespace="com.amazon.yjconversion", default=None
|
||||
):
|
||||
if namespace == "format_capabilities":
|
||||
fragment = self.fragments.get("$593", first=True)
|
||||
if fragment is not None:
|
||||
for fc in fragment.value:
|
||||
if fc.get("$492", "") == feature:
|
||||
return fc.get("version", "")
|
||||
else:
|
||||
fragment = self.fragments.get("$585", first=True)
|
||||
if fragment is not None:
|
||||
for cf in fragment.value.get("$590", []):
|
||||
if (
|
||||
cf.get("$586", "") == namespace
|
||||
and cf.get("$492", "") == feature
|
||||
):
|
||||
vi = cf.get("$589", {}).get("version", {})
|
||||
major_version = vi.get("$587", 0)
|
||||
minor_version = vi.get("$588", 0)
|
||||
return (
|
||||
major_version
|
||||
if minor_version == 0
|
||||
else (major_version, minor_version)
|
||||
)
|
||||
|
||||
return default
|
||||
|
||||
def get_generators(self):
|
||||
generators = set()
|
||||
|
||||
for fragment in self.fragments.get_all("$270"):
|
||||
if "version" in fragment.value:
|
||||
package_version = fragment.value.get("$588", "")
|
||||
generators.add(
|
||||
(
|
||||
fragment.value.get("$587", ""),
|
||||
package_version
|
||||
if package_version not in PACKAGE_VERSION_PLACEHOLDERS
|
||||
else "",
|
||||
)
|
||||
)
|
||||
|
||||
return generators
|
||||
|
||||
def get_features(self):
|
||||
features = set()
|
||||
|
||||
features.add(("symbols", "max_id", self.symtab.local_min_id - 1))
|
||||
|
||||
for fragment in self.fragments.get_all("$593"):
|
||||
for fc in fragment.value:
|
||||
features.add(
|
||||
("format_capabilities", fc.get("$492", ""), fc.get("version", ""))
|
||||
)
|
||||
|
||||
fragment = self.fragments.get("$585", first=True)
|
||||
if fragment is not None:
|
||||
for cf in fragment.value.get("$590", []):
|
||||
vi = cf.get("$589", {}).get("version", {})
|
||||
major_version = vi.get("$587", 0)
|
||||
minor_version = vi.get("$588", 0)
|
||||
features.add(
|
||||
(
|
||||
cf.get("$586", ""),
|
||||
cf.get("$492", ""),
|
||||
major_version
|
||||
if minor_version == 0
|
||||
else (major_version, minor_version),
|
||||
)
|
||||
)
|
||||
|
||||
return features
|
||||
|
||||
def report_features_and_metadata(self, unknown_only=False):
|
||||
report_generators = set()
|
||||
for generator in sorted(self.get_generators()):
|
||||
generator_version = ("%s/%s" % generator) if generator[1] else generator[0]
|
||||
if not is_known_generator(generator[0], generator[1]):
|
||||
log.warning("Unknown kfxgen: %s" % generator_version)
|
||||
elif not unknown_only:
|
||||
report_generators.add(generator_version)
|
||||
|
||||
if report_generators:
|
||||
log.info("kfxgen version: %s" % list_symbols(report_generators))
|
||||
|
||||
report_features = set()
|
||||
for namespace, key, value in sorted(self.get_features()):
|
||||
value_str = (
|
||||
quote_name(value)
|
||||
if isinstance(value, str)
|
||||
else (
|
||||
".".join([str(v) for v in value])
|
||||
if isinstance(value, tuple)
|
||||
else str(value)
|
||||
)
|
||||
)
|
||||
if is_known_feature(namespace, key, value):
|
||||
if not unknown_only:
|
||||
report_features.add("%s-%s" % (key, value_str))
|
||||
elif namespace == "symbols":
|
||||
log.warning(
|
||||
"Unknown %s feature: %s-%s" % (namespace, key, str(value_str))
|
||||
)
|
||||
else:
|
||||
log.error(
|
||||
"Unknown %s feature: %s-%s" % (namespace, key, str(value_str))
|
||||
)
|
||||
|
||||
if report_features:
|
||||
log.info("Features: %s" % list_symbols(report_features))
|
||||
|
||||
metadata = []
|
||||
fragment = self.fragments.get("$490", first=True)
|
||||
if fragment is not None:
|
||||
for cm in fragment.value.get("$491", {}):
|
||||
category = cm.get("$495", "")
|
||||
for kv in cm.get("$258", []):
|
||||
metadata.append(
|
||||
(
|
||||
kv.get("$492", ""),
|
||||
category,
|
||||
len(metadata),
|
||||
kv.get("$307", ""),
|
||||
)
|
||||
)
|
||||
|
||||
fragment = self.fragments.get("$258", first=True)
|
||||
if fragment is not None:
|
||||
for name, val in fragment.value.items():
|
||||
name = METADATA_NAMES.get(name, name.tostring())
|
||||
if name == "reading_orders":
|
||||
val = len(val)
|
||||
metadata.append((name, "metadata", len(metadata), val))
|
||||
|
||||
fragment = self.fragments.get("$389")
|
||||
if fragment is not None:
|
||||
for book_navigation in fragment.value:
|
||||
for nav_container in book_navigation.get("$392", []):
|
||||
if ion_type(nav_container) is IonSymbol:
|
||||
nav_container = self.fragments.get(
|
||||
ftype="$391", fid=nav_container
|
||||
)
|
||||
|
||||
if nav_container is not None:
|
||||
nav_container = unannotated(nav_container)
|
||||
if nav_container.get("$235", None) == "$237":
|
||||
num_pages = len(nav_container.get("$247", []))
|
||||
if num_pages:
|
||||
metadata.append(
|
||||
(
|
||||
"pages",
|
||||
"book_navigation",
|
||||
len(metadata),
|
||||
num_pages,
|
||||
)
|
||||
)
|
||||
|
||||
report_metadata = []
|
||||
for key, cat, seq, val in sorted(metadata):
|
||||
if not is_known_metadata(cat, key, val):
|
||||
log.warning("Unknown %s: %s=%s" % (cat, key, str(val)))
|
||||
elif not unknown_only:
|
||||
if key == "cover_image":
|
||||
try:
|
||||
cover_resource = self.fragments[
|
||||
YJFragmentKey(ftype="$164", fid=val)
|
||||
].value
|
||||
|
||||
cover_raw_data = None
|
||||
if "$165" in cover_resource:
|
||||
cover_raw_media = self.fragments.get(
|
||||
ftype="$417", fid=cover_resource["$165"]
|
||||
)
|
||||
if cover_raw_media is not None:
|
||||
cover_raw_data = cover_raw_media.value.tobytes()
|
||||
|
||||
resource_height = cover_resource.get("$423", 0)
|
||||
resource_width = cover_resource.get("$422", 0)
|
||||
|
||||
if (
|
||||
not (resource_width and resource_height)
|
||||
) and cover_raw_data is not None:
|
||||
with disable_debug_log():
|
||||
cover = Image.open(io.BytesIO(cover_raw_data))
|
||||
resource_width, resource_height = cover.size
|
||||
cover.close()
|
||||
|
||||
val = "%dx%d" % (resource_width, resource_height)
|
||||
|
||||
cover_format = SYMBOL_FORMATS.get(
|
||||
cover_resource["$161"], "unknown"
|
||||
)
|
||||
|
||||
if cover_raw_data is not None:
|
||||
cover_format = jpeg_type(cover_raw_data, cover_format)
|
||||
|
||||
if cover_format != "JPEG":
|
||||
val += "-" + cover_format
|
||||
|
||||
except Exception:
|
||||
val = "???"
|
||||
|
||||
elif key == "dictionary_lookup":
|
||||
val = "%s-to-%s" % (val.get("$474", "?"), val.get("$163", "?"))
|
||||
|
||||
elif key == "description" and len(val) > 20:
|
||||
val = "..."
|
||||
|
||||
meta_str = "%s=%s" % (key, quote_name(str(val)))
|
||||
|
||||
if meta_str not in report_metadata:
|
||||
report_metadata.append(meta_str)
|
||||
|
||||
if report_metadata:
|
||||
log.info("Metadata: %s" % list_symbols_unsorted(report_metadata))
|
||||
|
||||
def get_cover_image_data(self):
|
||||
cover_image_resource = self.get_metadata_value("cover_image")
|
||||
if not cover_image_resource:
|
||||
return None
|
||||
|
||||
cover_resource = self.fragments.get(ftype="$164", fid=cover_image_resource)
|
||||
if cover_resource is None:
|
||||
return None
|
||||
|
||||
cover_fmt = cover_resource.value["$161"]
|
||||
if ion_type(cover_fmt) is IonSymbol:
|
||||
cover_fmt = SYMBOL_FORMATS[cover_fmt]
|
||||
|
||||
cover_raw_media = self.fragments.get(
|
||||
ftype="$417", fid=cover_resource.value["$165"]
|
||||
)
|
||||
if cover_raw_media is None:
|
||||
return None
|
||||
|
||||
return (
|
||||
"jpeg" if cover_fmt == "jpg" else cover_fmt,
|
||||
cover_raw_media.value.tobytes(),
|
||||
)
|
||||
|
||||
def fix_cover_image_data(self, cover_image_data):
|
||||
fmt = cover_image_data[0]
|
||||
data = orig_data = cover_image_data[1]
|
||||
|
||||
if fmt.lower() in ["jpg", "jpeg"] and not data.startswith(b"\xff\xd8\xff\xe0"):
|
||||
try:
|
||||
with disable_debug_log():
|
||||
cover = Image.open(io.BytesIO(data))
|
||||
outfile = io.BytesIO()
|
||||
cover.save(outfile, "jpeg", quality=90)
|
||||
cover.close()
|
||||
|
||||
data = outfile.getvalue()
|
||||
except Exception:
|
||||
data = orig_data
|
||||
|
||||
if data.startswith(b"\xff\xd8\xff\xe0"):
|
||||
log.info(
|
||||
"Changed cover image from %s to JPEG/JFIF for Kindle lockscreen"
|
||||
% jpeg_type(orig_data)
|
||||
)
|
||||
else:
|
||||
log.error(
|
||||
"Failed to change cover image from %s to JPEG/JFIF"
|
||||
% jpeg_type(orig_data)
|
||||
)
|
||||
data = orig_data
|
||||
|
||||
return (fmt, data)
|
||||
|
||||
def set_cover_image_data(self, cover_image_data, update_cover_section=True):
|
||||
fmt = cover_image_data[0].lower()
|
||||
if fmt == "jpeg":
|
||||
fmt = "jpg"
|
||||
|
||||
if fmt != "jpg":
|
||||
raise Exception(
|
||||
"Cannot set KFX cover image format to %s, must be JPEG" % fmt.upper()
|
||||
)
|
||||
|
||||
cover_image = self.get_metadata_value("cover_image")
|
||||
if cover_image is None:
|
||||
cover_image = KFX_COVER_RESOURCE
|
||||
cover_image_symbol = self.create_local_symbol(cover_image)
|
||||
self.fragments.append(
|
||||
YJFragment(
|
||||
ftype="$164",
|
||||
fid=cover_image_symbol,
|
||||
value=IonStruct(IS("$175"), cover_image_symbol),
|
||||
)
|
||||
)
|
||||
|
||||
data = cover_image_data[1]
|
||||
cover_resource = self.update_image_resource_and_media(
|
||||
cover_image, data, fmt, update_cover_section
|
||||
)
|
||||
|
||||
if "$214" in cover_resource:
|
||||
with disable_debug_log():
|
||||
cover_thumbnail = Image.open(io.BytesIO(data))
|
||||
cover_thumbnail.thumbnail((512, 512), Image.ANTIALIAS)
|
||||
outfile = io.BytesIO()
|
||||
cover_thumbnail.save(
|
||||
outfile, "jpeg" if fmt == "jpg" else fmt, quality=90
|
||||
)
|
||||
cover_thumbnail.close()
|
||||
|
||||
thumbnail_data = outfile.getvalue()
|
||||
|
||||
thumbnail_resource = unannotated(cover_resource["$214"])
|
||||
self.update_image_resource_and_media(
|
||||
str(thumbnail_resource), thumbnail_data, fmt
|
||||
)
|
||||
|
||||
return cover_image
|
||||
|
||||
def update_image_resource_and_media(
|
||||
self, resource_name, data, fmt, update_cover_section=False
|
||||
):
|
||||
cover_resource = self.fragments.get(ftype="$164", fid=resource_name).value
|
||||
|
||||
cover_resource[IS("$161")] = IS(FORMAT_SYMBOLS[fmt])
|
||||
cover_resource[IS("$162")] = "image/" + fmt
|
||||
|
||||
cover_resource.pop("$56", None)
|
||||
cover_resource.pop("$57", None)
|
||||
cover_resource.pop("$66", None)
|
||||
cover_resource.pop("$67", None)
|
||||
|
||||
cover = Image.open(io.BytesIO(data))
|
||||
width, height = cover.size
|
||||
cover.close()
|
||||
|
||||
orig_width = cover_resource.get("$422", 0)
|
||||
orig_height = cover_resource.get("$423", 0)
|
||||
|
||||
cover_resource[IS("$422")] = width
|
||||
cover_resource[IS("$423")] = height
|
||||
|
||||
if "$165" in cover_resource:
|
||||
self.fragments[
|
||||
YJFragmentKey(ftype="$417", fid=cover_resource["$165"])
|
||||
].value = IonBLOB(data)
|
||||
else:
|
||||
location = "%s.%s" % (resource_name, fmt)
|
||||
cover_resource[IS("$165")] = location
|
||||
self.fragments.append(
|
||||
YJFragment(
|
||||
ftype="$417",
|
||||
fid=self.create_local_symbol(location),
|
||||
value=IonBLOB(data),
|
||||
)
|
||||
)
|
||||
|
||||
if update_cover_section and (width != orig_width or height != orig_height):
|
||||
section_updated = False
|
||||
if self.locate_cover_image_resource_from_content() == resource_name:
|
||||
section_names = self.ordered_section_names()
|
||||
if len(section_names) > 0:
|
||||
cover_section = self.fragments.get(
|
||||
ftype="$260", fid=section_names[0]
|
||||
).value
|
||||
page_templates = cover_section["$141"]
|
||||
page_template = (
|
||||
page_templates[0] if len(page_templates) == 1 else {}
|
||||
)
|
||||
if (
|
||||
page_template.get("$159") == "$270"
|
||||
and page_template.get("$156") == "$326"
|
||||
and page_template.get("$140") == "$320"
|
||||
and page_template.get("$66", -1) == orig_width
|
||||
and page_template.get("$67", -1) == orig_height
|
||||
):
|
||||
page_template[IS("$66")] = width
|
||||
page_template[IS("$67")] = height
|
||||
section_updated = True
|
||||
|
||||
if not section_updated:
|
||||
log.info("First page image dimensions were not updated")
|
||||
|
||||
return cover_resource
|
||||
|
||||
def locate_cover_image_resource_from_content(self, replace_pdf=False):
|
||||
section_names = self.ordered_section_names()
|
||||
if not section_names:
|
||||
return None
|
||||
|
||||
cover_section = self.fragments.get(ftype="$260", fid=section_names[0]).value
|
||||
for page_template in cover_section["$141"]:
|
||||
story_name = page_template.get("$176")
|
||||
if story_name:
|
||||
break
|
||||
else:
|
||||
return None
|
||||
|
||||
cover_story = self.fragments.get(ftype="$259", fid=story_name).value
|
||||
|
||||
def scan_content_for_image(content):
|
||||
if content.get("$159") == "$271" and "$175" in content:
|
||||
return content["$175"]
|
||||
|
||||
for subcontent in content.get("$146", {}):
|
||||
img = scan_content_for_image(subcontent)
|
||||
if img is not None:
|
||||
return img
|
||||
|
||||
return None
|
||||
|
||||
resource_name = scan_content_for_image(cover_story)
|
||||
if resource_name is None:
|
||||
return None
|
||||
|
||||
cover_resource = self.fragments.get(ftype="$164", fid=resource_name).value
|
||||
if cover_resource[IS("$161")] != "$565":
|
||||
return resource_name
|
||||
|
||||
if not replace_pdf:
|
||||
return None
|
||||
|
||||
location = cover_resource["$165"]
|
||||
raw_media = self.fragments[YJFragmentKey(ftype="$417", fid=location)].value
|
||||
page_num = cover_resource.get("$564", 0) + 1
|
||||
|
||||
try:
|
||||
jpeg_data = convert_pdf_to_jpeg(raw_media, page_num)
|
||||
except Exception as e:
|
||||
log.error(
|
||||
"Exception during conversion of PDF '%s' page %d to JPEG: %s"
|
||||
% (location, page_num, repr(e))
|
||||
)
|
||||
return None
|
||||
|
||||
return self.set_cover_image_data(
|
||||
("jpeg", jpeg_data), update_cover_section=False
|
||||
)
|
||||
|
||||
|
||||
def author_sort_name(author):
|
||||
PERSON_SUFFIXES = {
|
||||
"phd",
|
||||
"md",
|
||||
"ba",
|
||||
"ma",
|
||||
"dds",
|
||||
"msts",
|
||||
"sr",
|
||||
"senior",
|
||||
"jr",
|
||||
"junior",
|
||||
"ii",
|
||||
"iii",
|
||||
"iv",
|
||||
}
|
||||
|
||||
al = author.split()
|
||||
|
||||
if len(al) < 2:
|
||||
return author
|
||||
|
||||
if len(al) > 2 and al[-1].replace(".", "").lower() in PERSON_SUFFIXES:
|
||||
if al[-2].endswith(","):
|
||||
al[-2] = al[-2][:-1]
|
||||
|
||||
al = al[0:-2] + ["%s %s" % (al[-2], al[-1])]
|
||||
|
||||
if "," in "".join(al):
|
||||
return author
|
||||
|
||||
return al[-1] + ", " + " ".join(al[:-1])
|
||||
|
||||
|
||||
def unsort_author_name(author):
|
||||
if ", " in author:
|
||||
last, sep, first = author.partition(", ")
|
||||
author = first + " " + last
|
||||
|
||||
return author
|
||||
1541
kindle_download_helper/third_party/kfxlib/yj_position_location.py
vendored
Normal file
1541
kindle_download_helper/third_party/kfxlib/yj_position_location.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1577
kindle_download_helper/third_party/kfxlib/yj_structure.py
vendored
Normal file
1577
kindle_download_helper/third_party/kfxlib/yj_structure.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
852
kindle_download_helper/third_party/kfxlib/yj_symbol_catalog.py
vendored
Normal file
852
kindle_download_helper/third_party/kfxlib/yj_symbol_catalog.py
vendored
Normal file
@@ -0,0 +1,852 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2016-2022, John Howell <jhowell@acm.org>"
|
||||
|
||||
|
||||
class IonSharedSymbolTable(object):
|
||||
def __init__(self, name, version=1, symbols=[]):
|
||||
self.name = name
|
||||
self.version = version
|
||||
self.symbols = symbols
|
||||
|
||||
|
||||
SYSTEM_SYMBOL_TABLE = IonSharedSymbolTable(
|
||||
name="$ion",
|
||||
version=1,
|
||||
symbols=[
|
||||
"$ion",
|
||||
"$ion_1_0",
|
||||
"$ion_symbol_table",
|
||||
"name",
|
||||
"version",
|
||||
"imports",
|
||||
"symbols",
|
||||
"max_id",
|
||||
"$ion_shared_symbol_table",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
YJ_SYMBOLS = IonSharedSymbolTable(
|
||||
name="YJ_symbols",
|
||||
version=10,
|
||||
symbols=[
|
||||
"$10",
|
||||
"$11",
|
||||
"$12",
|
||||
"$13",
|
||||
"$14?",
|
||||
"$15",
|
||||
"$16",
|
||||
"$17?",
|
||||
"$18?",
|
||||
"$19",
|
||||
"$20?",
|
||||
"$21",
|
||||
"$22?",
|
||||
"$23",
|
||||
"$24",
|
||||
"$25?",
|
||||
"$26?",
|
||||
"$27",
|
||||
"$28?",
|
||||
"$29?",
|
||||
"$30?",
|
||||
"$31",
|
||||
"$32",
|
||||
"$33",
|
||||
"$34",
|
||||
"$35",
|
||||
"$36",
|
||||
"$37?",
|
||||
"$38?",
|
||||
"$39?",
|
||||
"$40?",
|
||||
"$41",
|
||||
"$42",
|
||||
"$43?",
|
||||
"$44",
|
||||
"$45",
|
||||
"$46",
|
||||
"$47",
|
||||
"$48",
|
||||
"$49",
|
||||
"$50",
|
||||
"$51",
|
||||
"$52",
|
||||
"$53",
|
||||
"$54",
|
||||
"$55",
|
||||
"$56",
|
||||
"$57",
|
||||
"$58",
|
||||
"$59",
|
||||
"$60",
|
||||
"$61",
|
||||
"$62",
|
||||
"$63",
|
||||
"$64",
|
||||
"$65",
|
||||
"$66",
|
||||
"$67",
|
||||
"$68",
|
||||
"$69",
|
||||
"$70",
|
||||
"$71?",
|
||||
"$72",
|
||||
"$73",
|
||||
"$74?",
|
||||
"$75",
|
||||
"$76",
|
||||
"$77",
|
||||
"$78?",
|
||||
"$79?",
|
||||
"$80?",
|
||||
"$81?",
|
||||
"$82?",
|
||||
"$83",
|
||||
"$84",
|
||||
"$85",
|
||||
"$86",
|
||||
"$87",
|
||||
"$88",
|
||||
"$89",
|
||||
"$90",
|
||||
"$91",
|
||||
"$92",
|
||||
"$93",
|
||||
"$94",
|
||||
"$95",
|
||||
"$96",
|
||||
"$97",
|
||||
"$98",
|
||||
"$99",
|
||||
"$100",
|
||||
"$101?",
|
||||
"$102",
|
||||
"$103?",
|
||||
"$104",
|
||||
"$105",
|
||||
"$106",
|
||||
"$107",
|
||||
"$108",
|
||||
"$109?",
|
||||
"$110?",
|
||||
"$111?",
|
||||
"$112",
|
||||
"$113?",
|
||||
"$114?",
|
||||
"$115?",
|
||||
"$116?",
|
||||
"$117?",
|
||||
"$118",
|
||||
"$119?",
|
||||
"$120?",
|
||||
"$121?",
|
||||
"$122?",
|
||||
"$123?",
|
||||
"$124?",
|
||||
"$125",
|
||||
"$126",
|
||||
"$127",
|
||||
"$128?",
|
||||
"$129?",
|
||||
"$130?",
|
||||
"$131",
|
||||
"$132",
|
||||
"$133",
|
||||
"$134",
|
||||
"$135",
|
||||
"$136",
|
||||
"$137",
|
||||
"$138?",
|
||||
"$139?",
|
||||
"$140",
|
||||
"$141",
|
||||
"$142",
|
||||
"$143",
|
||||
"$144",
|
||||
"$145",
|
||||
"$146",
|
||||
"$147?",
|
||||
"$148",
|
||||
"$149",
|
||||
"$150",
|
||||
"$151",
|
||||
"$152",
|
||||
"$153",
|
||||
"$154",
|
||||
"$155",
|
||||
"$156",
|
||||
"$157",
|
||||
"$158?",
|
||||
"$159",
|
||||
"$160?",
|
||||
"$161",
|
||||
"$162",
|
||||
"$163",
|
||||
"$164",
|
||||
"$165",
|
||||
"$166",
|
||||
"$167",
|
||||
"$168?",
|
||||
"$169",
|
||||
"$170",
|
||||
"$171",
|
||||
"$172?",
|
||||
"$173",
|
||||
"$174",
|
||||
"$175",
|
||||
"$176",
|
||||
"$177?",
|
||||
"$178",
|
||||
"$179",
|
||||
"$180",
|
||||
"$181",
|
||||
"$182",
|
||||
"$183",
|
||||
"$184",
|
||||
"$185",
|
||||
"$186",
|
||||
"$187?",
|
||||
"$188?",
|
||||
"$189?",
|
||||
"$190?",
|
||||
"$191?",
|
||||
"$192",
|
||||
"$193?",
|
||||
"$194?",
|
||||
"$195?",
|
||||
"$196?",
|
||||
"$197?",
|
||||
"$198?",
|
||||
"$199",
|
||||
"$200",
|
||||
"$201",
|
||||
"$202",
|
||||
"$203",
|
||||
"$204?",
|
||||
"$205",
|
||||
"$206",
|
||||
"$207",
|
||||
"$208",
|
||||
"$209",
|
||||
"$210",
|
||||
"$211",
|
||||
"$212",
|
||||
"$213",
|
||||
"$214",
|
||||
"$215",
|
||||
"$216",
|
||||
"$217",
|
||||
"$218",
|
||||
"$219",
|
||||
"$220",
|
||||
"$221?",
|
||||
"$222",
|
||||
"$223?",
|
||||
"$224",
|
||||
"$225?",
|
||||
"$226?",
|
||||
"$227?",
|
||||
"$228?",
|
||||
"$229?",
|
||||
"$230",
|
||||
"$231",
|
||||
"$232",
|
||||
"$233",
|
||||
"$234?",
|
||||
"$235",
|
||||
"$236",
|
||||
"$237",
|
||||
"$238",
|
||||
"$239",
|
||||
"$240",
|
||||
"$241",
|
||||
"$242?",
|
||||
"$243?",
|
||||
"$244",
|
||||
"$245",
|
||||
"$246",
|
||||
"$247",
|
||||
"$248",
|
||||
"$249",
|
||||
"$250",
|
||||
"$251",
|
||||
"$252",
|
||||
"$253",
|
||||
"$254",
|
||||
"$255",
|
||||
"$256?",
|
||||
"$257?",
|
||||
"$258",
|
||||
"$259",
|
||||
"$260",
|
||||
"$261?",
|
||||
"$262",
|
||||
"$263?",
|
||||
"$264",
|
||||
"$265",
|
||||
"$266",
|
||||
"$267",
|
||||
"$268?",
|
||||
"$269",
|
||||
"$270",
|
||||
"$271",
|
||||
"$272",
|
||||
"$273",
|
||||
"$274",
|
||||
"$275?",
|
||||
"$276",
|
||||
"$277",
|
||||
"$278",
|
||||
"$279",
|
||||
"$280?",
|
||||
"$281",
|
||||
"$282",
|
||||
"$283",
|
||||
"$284",
|
||||
"$285",
|
||||
"$286",
|
||||
"$287",
|
||||
"$288?",
|
||||
"$289?",
|
||||
"$290?",
|
||||
"$291?",
|
||||
"$292",
|
||||
"$293",
|
||||
"$294",
|
||||
"$295?",
|
||||
"$296",
|
||||
"$297?",
|
||||
"$298",
|
||||
"$299",
|
||||
"$300?",
|
||||
"$301?",
|
||||
"$302?",
|
||||
"$303?",
|
||||
"$304",
|
||||
"$305",
|
||||
"$306",
|
||||
"$307",
|
||||
"$308",
|
||||
"$309?",
|
||||
"$310",
|
||||
"$311",
|
||||
"$312",
|
||||
"$313?",
|
||||
"$314",
|
||||
"$315?",
|
||||
"$316?",
|
||||
"$317?",
|
||||
"$318",
|
||||
"$319",
|
||||
"$320",
|
||||
"$321",
|
||||
"$322",
|
||||
"$323",
|
||||
"$324",
|
||||
"$325",
|
||||
"$326",
|
||||
"$327?",
|
||||
"$328",
|
||||
"$329",
|
||||
"$330",
|
||||
"$331",
|
||||
"$332?",
|
||||
"$333?",
|
||||
"$334",
|
||||
"$335",
|
||||
"$336",
|
||||
"$337",
|
||||
"$338?",
|
||||
"$339?",
|
||||
"$340",
|
||||
"$341",
|
||||
"$342",
|
||||
"$343",
|
||||
"$344",
|
||||
"$345",
|
||||
"$346",
|
||||
"$347",
|
||||
"$348",
|
||||
"$349",
|
||||
"$350",
|
||||
"$351",
|
||||
"$352",
|
||||
"$353",
|
||||
"$354?",
|
||||
"$355",
|
||||
"$356",
|
||||
"$357",
|
||||
"$358?",
|
||||
"$359",
|
||||
"$360",
|
||||
"$361",
|
||||
"$362",
|
||||
"$363",
|
||||
"$364?",
|
||||
"$365?",
|
||||
"$366?",
|
||||
"$367?",
|
||||
"$368?",
|
||||
"$369",
|
||||
"$370",
|
||||
"$371",
|
||||
"$372",
|
||||
"$373",
|
||||
"$374",
|
||||
"$375",
|
||||
"$376",
|
||||
"$377",
|
||||
"$378",
|
||||
"$379",
|
||||
"$380?",
|
||||
"$381",
|
||||
"$382",
|
||||
"$383",
|
||||
"$384",
|
||||
"$385",
|
||||
"$386",
|
||||
"$387",
|
||||
"$388?",
|
||||
"$389",
|
||||
"$390",
|
||||
"$391",
|
||||
"$392",
|
||||
"$393",
|
||||
"$394",
|
||||
"$395",
|
||||
"$396",
|
||||
"$397?",
|
||||
"$398?",
|
||||
"$399?",
|
||||
"$400?",
|
||||
"$401?",
|
||||
"$402?",
|
||||
"$403",
|
||||
"$404?",
|
||||
"$405?",
|
||||
"$406?",
|
||||
"$407?",
|
||||
"$408?",
|
||||
"$409",
|
||||
"$410",
|
||||
"$411",
|
||||
"$412",
|
||||
"$413",
|
||||
"$414",
|
||||
"$415",
|
||||
"$416",
|
||||
"$417",
|
||||
"$418",
|
||||
"$419",
|
||||
"$420?",
|
||||
"$421",
|
||||
"$422",
|
||||
"$423",
|
||||
"$424",
|
||||
"$425?",
|
||||
"$426",
|
||||
"$427",
|
||||
"$428",
|
||||
"$429",
|
||||
"$430?",
|
||||
"$431?",
|
||||
"$432",
|
||||
"$433",
|
||||
"$434",
|
||||
"$435?",
|
||||
"$436",
|
||||
"$437",
|
||||
"$438",
|
||||
"$439",
|
||||
"$440?",
|
||||
"$441",
|
||||
"$442",
|
||||
"$443?",
|
||||
"$444?",
|
||||
"$445?",
|
||||
"$446?",
|
||||
"$447",
|
||||
"$448?",
|
||||
"$449",
|
||||
"$450?",
|
||||
"$451?",
|
||||
"$452?",
|
||||
"$453",
|
||||
"$454",
|
||||
"$455",
|
||||
"$456",
|
||||
"$457",
|
||||
"$458?",
|
||||
"$459",
|
||||
"$460",
|
||||
"$461",
|
||||
"$462",
|
||||
"$463?",
|
||||
"$464",
|
||||
"$465",
|
||||
"$466",
|
||||
"$467?",
|
||||
"$468",
|
||||
"$469?",
|
||||
"$470?",
|
||||
"$471?",
|
||||
"$472",
|
||||
"$473?",
|
||||
"$474",
|
||||
"$475",
|
||||
"$476",
|
||||
"$477",
|
||||
"$478",
|
||||
"$479",
|
||||
"$480",
|
||||
"$481",
|
||||
"$482",
|
||||
"$483",
|
||||
"$484",
|
||||
"$485",
|
||||
"$486",
|
||||
"$487",
|
||||
"$488",
|
||||
"$489",
|
||||
"$490",
|
||||
"$491",
|
||||
"$492",
|
||||
"$493?",
|
||||
"$494?",
|
||||
"$495",
|
||||
"$496",
|
||||
"$497",
|
||||
"$498",
|
||||
"$499",
|
||||
"$500",
|
||||
"$501",
|
||||
"$502",
|
||||
"$503",
|
||||
"$504?",
|
||||
"$505",
|
||||
"$506?",
|
||||
"$507?",
|
||||
"$508?",
|
||||
"$509",
|
||||
"$510?",
|
||||
"$511?",
|
||||
"$512?",
|
||||
"$513?",
|
||||
"$514?",
|
||||
"$515?",
|
||||
"$516?",
|
||||
"$517?",
|
||||
"$518?",
|
||||
"$519?",
|
||||
"$520?",
|
||||
"$521?",
|
||||
"$522?",
|
||||
"$523?",
|
||||
"$524?",
|
||||
"$525",
|
||||
"$526",
|
||||
"$527?",
|
||||
"$528",
|
||||
"$529?",
|
||||
"$530?",
|
||||
"$531?",
|
||||
"$532?",
|
||||
"$533?",
|
||||
"$534?",
|
||||
"$535?",
|
||||
"$536?",
|
||||
"$537?",
|
||||
"$538",
|
||||
"$539?",
|
||||
"$540?",
|
||||
"$541?",
|
||||
"$542?",
|
||||
"$543?",
|
||||
"$544?",
|
||||
"$545?",
|
||||
"$546",
|
||||
"$547",
|
||||
"$548",
|
||||
"$549",
|
||||
"$550",
|
||||
"$551",
|
||||
"$552",
|
||||
"$553",
|
||||
"$554",
|
||||
"$555?",
|
||||
"$556?",
|
||||
"$557",
|
||||
"$558",
|
||||
"$559",
|
||||
"$560",
|
||||
"$561?",
|
||||
"$562?",
|
||||
"$563?",
|
||||
"$564",
|
||||
"$565",
|
||||
"$566?",
|
||||
"$567?",
|
||||
"$568?",
|
||||
"$569",
|
||||
"$570",
|
||||
"$571?",
|
||||
"$572?",
|
||||
"$573",
|
||||
"$574?",
|
||||
"$575?",
|
||||
"$576",
|
||||
"$577",
|
||||
"$578?",
|
||||
"$579?",
|
||||
"$580",
|
||||
"$581",
|
||||
"$582?",
|
||||
"$583",
|
||||
"$584",
|
||||
"$585",
|
||||
"$586",
|
||||
"$587",
|
||||
"$588",
|
||||
"$589",
|
||||
"$590",
|
||||
"$591",
|
||||
"$592",
|
||||
"$593",
|
||||
"$594",
|
||||
"$595",
|
||||
"$596",
|
||||
"$597",
|
||||
"$598",
|
||||
"$599?",
|
||||
"$600?",
|
||||
"$601",
|
||||
"$602",
|
||||
"$603?",
|
||||
"$604",
|
||||
"$605",
|
||||
"$606",
|
||||
"$607?",
|
||||
"$608",
|
||||
"$609",
|
||||
"$610",
|
||||
"$611",
|
||||
"$612?",
|
||||
"$613",
|
||||
"$614",
|
||||
"$615",
|
||||
"$616",
|
||||
"$617",
|
||||
"$618",
|
||||
"$619",
|
||||
"$620?",
|
||||
"$621",
|
||||
"$622",
|
||||
"$623",
|
||||
"$624?",
|
||||
"$625",
|
||||
"$626?",
|
||||
"$627?",
|
||||
"$628",
|
||||
"$629",
|
||||
"$630",
|
||||
"$631?",
|
||||
"$632",
|
||||
"$633",
|
||||
"$634?",
|
||||
"$635",
|
||||
"$636",
|
||||
"$637",
|
||||
"$638",
|
||||
"$639",
|
||||
"$640",
|
||||
"$641",
|
||||
"$642",
|
||||
"$643",
|
||||
"$644",
|
||||
"$645",
|
||||
"$646",
|
||||
"$647",
|
||||
"$648",
|
||||
"$649",
|
||||
"$650",
|
||||
"$651?",
|
||||
"$652",
|
||||
"$653?",
|
||||
"$654?",
|
||||
"$655",
|
||||
"$656",
|
||||
"$657",
|
||||
"$658",
|
||||
"$659",
|
||||
"$660",
|
||||
"$661?",
|
||||
"$662?",
|
||||
"$663",
|
||||
"$664",
|
||||
"$665",
|
||||
"$666",
|
||||
"$667?",
|
||||
"$668",
|
||||
"$669?",
|
||||
"$670?",
|
||||
"$671",
|
||||
"$672",
|
||||
"$673",
|
||||
"$674",
|
||||
"$675",
|
||||
"$676",
|
||||
"$677",
|
||||
"$678",
|
||||
"$679",
|
||||
"$680",
|
||||
"$681",
|
||||
"$682",
|
||||
"$683",
|
||||
"$684",
|
||||
"$685?",
|
||||
"$686",
|
||||
"$687",
|
||||
"$688",
|
||||
"$689",
|
||||
"$690",
|
||||
"$691?",
|
||||
"$692",
|
||||
"$693",
|
||||
"$694?",
|
||||
"$695?",
|
||||
"$696",
|
||||
"$697",
|
||||
"$698",
|
||||
"$699?",
|
||||
"$700",
|
||||
"$701",
|
||||
"$702",
|
||||
"$703",
|
||||
"$704",
|
||||
"$705",
|
||||
"$706",
|
||||
"$707",
|
||||
"$708",
|
||||
"$709?",
|
||||
"$710?",
|
||||
"$711?",
|
||||
"$712?",
|
||||
"$713?",
|
||||
"$714?",
|
||||
"$715?",
|
||||
"$716?",
|
||||
"$717",
|
||||
"$718",
|
||||
"$719",
|
||||
"$720",
|
||||
"$721?",
|
||||
"$722?",
|
||||
"$723?",
|
||||
"$724",
|
||||
"$725",
|
||||
"$726",
|
||||
"$727",
|
||||
"$728",
|
||||
"$729",
|
||||
"$730",
|
||||
"$731",
|
||||
"$732",
|
||||
"$733",
|
||||
"$734",
|
||||
"$735",
|
||||
"$736",
|
||||
"$737?",
|
||||
"$738?",
|
||||
"$739?",
|
||||
"$740?",
|
||||
"$741",
|
||||
"$742",
|
||||
"$743?",
|
||||
"$744?",
|
||||
"$745?",
|
||||
"$746?",
|
||||
"$747?",
|
||||
"$748?",
|
||||
"$749",
|
||||
"$750",
|
||||
"$751",
|
||||
"$752",
|
||||
"$753",
|
||||
"$754",
|
||||
"$755",
|
||||
"$756",
|
||||
"$757",
|
||||
"$758",
|
||||
"$759",
|
||||
"$760",
|
||||
"$761",
|
||||
"$762",
|
||||
"$763",
|
||||
"$764?",
|
||||
"$765",
|
||||
"$766",
|
||||
"$767?",
|
||||
"$768?",
|
||||
"$769?",
|
||||
"$770?",
|
||||
"$771?",
|
||||
"$772?",
|
||||
"$773",
|
||||
"$774",
|
||||
"$775?",
|
||||
"$776?",
|
||||
"$777?",
|
||||
"$778",
|
||||
"$779",
|
||||
"$780",
|
||||
"$781",
|
||||
"$782",
|
||||
"$783",
|
||||
"$784",
|
||||
"$785",
|
||||
"$786",
|
||||
"$787?",
|
||||
"$788",
|
||||
"$789",
|
||||
"$790",
|
||||
"$791",
|
||||
"$792",
|
||||
"$793",
|
||||
"$794",
|
||||
"$795",
|
||||
"$796",
|
||||
"$797",
|
||||
"$798?",
|
||||
"$799?",
|
||||
"$800?",
|
||||
"$801?",
|
||||
"$802?",
|
||||
"$803?",
|
||||
"$804?",
|
||||
"$805?",
|
||||
"$806?",
|
||||
"$807?",
|
||||
"$808?",
|
||||
"$809?",
|
||||
"$810?",
|
||||
"$811?",
|
||||
"$812?",
|
||||
"$813?",
|
||||
"$814?",
|
||||
"$815?",
|
||||
"$816?",
|
||||
"$817?",
|
||||
"$818?",
|
||||
"$819?",
|
||||
"$820?",
|
||||
"$821",
|
||||
"$822",
|
||||
"$823",
|
||||
"$824",
|
||||
"$825",
|
||||
],
|
||||
)
|
||||
457
kindle_download_helper/third_party/kfxlib/yj_to_epub.py
vendored
Normal file
457
kindle_download_helper/third_party/kfxlib/yj_to_epub.py
vendored
Normal file
@@ -0,0 +1,457 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import collections
|
||||
import copy
|
||||
import decimal
|
||||
import re
|
||||
|
||||
from .epub_output import EPUB_Output
|
||||
from .ion import (
|
||||
IonAnnotation,
|
||||
IonList,
|
||||
IonSExp,
|
||||
IonString,
|
||||
IonStruct,
|
||||
IonSymbol,
|
||||
ion_type,
|
||||
)
|
||||
from .message_logging import log
|
||||
from .python_transition import IS_PYTHON2
|
||||
from .utilities import UUID_MATCH_RE, check_empty, list_symbols, truncate_list
|
||||
from .yj_structure import SYM_TYPE
|
||||
from .yj_to_epub_content import KFX_EPUB_Content
|
||||
from .yj_to_epub_metadata import KFX_EPUB_Metadata
|
||||
from .yj_to_epub_misc import KFX_EPUB_Misc
|
||||
from .yj_to_epub_navigation import KFX_EPUB_Navigation
|
||||
from .yj_to_epub_properties import GENERIC_FONT_NAMES, KFX_EPUB_Properties
|
||||
from .yj_to_epub_resources import KFX_EPUB_Resources
|
||||
|
||||
if IS_PYTHON2:
|
||||
from .python_transition import str
|
||||
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2016-2022, John Howell <jhowell@acm.org>"
|
||||
|
||||
|
||||
REPORT_MISSING_FONTS = True
|
||||
|
||||
RETAIN_USED_FRAGMENTS = False
|
||||
RETAIN_UNUSED_RESOURCES = False
|
||||
|
||||
|
||||
FRAGMENT_NAME_SYMBOL = {
|
||||
"$266": "$180",
|
||||
"$164": "$175",
|
||||
"$391": "$239",
|
||||
"$393": "$240",
|
||||
"$260": "$174",
|
||||
"$608": "$598",
|
||||
"$259": "$176",
|
||||
"$157": "$173",
|
||||
}
|
||||
|
||||
|
||||
class KFX_EPUB(
|
||||
KFX_EPUB_Content,
|
||||
KFX_EPUB_Metadata,
|
||||
KFX_EPUB_Misc,
|
||||
KFX_EPUB_Navigation,
|
||||
KFX_EPUB_Properties,
|
||||
KFX_EPUB_Resources,
|
||||
):
|
||||
DEBUG = False
|
||||
|
||||
def __init__(self, book, epub2_desired=False):
|
||||
self.book = book
|
||||
self.book_symbols = set()
|
||||
self.book_data = self.organize_fragments_by_type(book.fragments)
|
||||
self.is_kpf = book.kpf_container is not None
|
||||
self.used_fragments = {}
|
||||
self.epub = EPUB_Output(epub2_desired)
|
||||
self.epub.get_anchor_uri = self.get_anchor_uri
|
||||
|
||||
self.determine_book_symbol_format()
|
||||
|
||||
decimal.getcontext().prec = 6
|
||||
|
||||
self.page_label_anchor_id = {}
|
||||
self.reported_duplicate_page_label = set()
|
||||
self.reported_pdf_errors = set()
|
||||
self.used_kfx_styles = set()
|
||||
self.missing_kfx_styles = set()
|
||||
self.css_rules = {}
|
||||
self.css_files = set()
|
||||
self.missing_special_classes = set()
|
||||
self.media_queries = collections.defaultdict(dict)
|
||||
self.font_names = set()
|
||||
self.missing_font_names = set()
|
||||
self.font_name_replacements = {}
|
||||
self.font_faces = []
|
||||
self.location_filenames = {}
|
||||
self.reported_characters = set()
|
||||
self.text_combine_in_use = False
|
||||
self.incorrect_font_quoting = set()
|
||||
|
||||
for name in GENERIC_FONT_NAMES:
|
||||
self.fix_font_name(name, add=True, generic=True)
|
||||
|
||||
self.nav_container_section = {}
|
||||
self.navto_anchor = {}
|
||||
self.toc_entry_count = 0
|
||||
self.anchor_uri = {}
|
||||
self.anchor_elem = {}
|
||||
self.anchor_id = {}
|
||||
self.anchor_ids = set()
|
||||
self.position_anchors = {}
|
||||
self.anchor_positions = {}
|
||||
self.used_anchors = set()
|
||||
self.immovable_anchors = set()
|
||||
self.page_anchor_id_label = {}
|
||||
self.fix_condition_href = False
|
||||
self.has_conditional_content = False
|
||||
self.context_ = []
|
||||
self.save_resources = True
|
||||
|
||||
self.cde_content_type = ""
|
||||
|
||||
self.resource_cache = {}
|
||||
self.used_raw_media = set()
|
||||
|
||||
self.process_fonts()
|
||||
self.process_document_data()
|
||||
self.process_content_features()
|
||||
self.process_metadata()
|
||||
|
||||
if self.epub.illustrated_layout:
|
||||
raise Exception("Illustrated layout (Kindle in Motion) is not supported.")
|
||||
|
||||
self.set_condition_operators()
|
||||
|
||||
self.process_anchors()
|
||||
self.process_navigation()
|
||||
|
||||
for style_name, yj_properties in self.book_data.get("$157", {}).items():
|
||||
self.check_fragment_name(yj_properties, "$157", style_name, delete=False)
|
||||
|
||||
self.process_reading_order()
|
||||
|
||||
if self.cover_resource and not self.epub.html_cover:
|
||||
try:
|
||||
self.process_external_resource(
|
||||
self.cover_resource
|
||||
).manifest_entry.is_cover_image = True
|
||||
except:
|
||||
print(self.cover_resource, "+++++++++")
|
||||
pass
|
||||
|
||||
self.fixup_anchors_and_hrefs()
|
||||
self.update_default_font_and_language()
|
||||
self.set_html_defaults()
|
||||
self.fixup_styles_and_classes()
|
||||
self.create_css_files()
|
||||
self.prepare_book_parts()
|
||||
|
||||
if self.position_anchors:
|
||||
pos = []
|
||||
for id in self.position_anchors:
|
||||
for offset in self.position_anchors[id]:
|
||||
pos.append("%s.%s" % (id, offset))
|
||||
|
||||
log.error(
|
||||
"Failed to locate %d referenced positions: %s"
|
||||
% (len(pos), ", ".join(truncate_list(sorted(pos))))
|
||||
)
|
||||
|
||||
if RETAIN_UNUSED_RESOURCES:
|
||||
for external_resource in self.book_data.get("$164", {}):
|
||||
self.process_external_resource(external_resource)
|
||||
|
||||
self.check_empty(self.book_data.pop("$164", {}), "external_resource")
|
||||
|
||||
self.report_duplicate_anchors()
|
||||
|
||||
raw_media = self.book_data.pop("$417", {})
|
||||
for used_raw_media in self.used_raw_media:
|
||||
raw_media.pop(used_raw_media)
|
||||
|
||||
self.check_empty(raw_media, "raw_media")
|
||||
self.check_empty(self.book_data.pop("$260", {}), "$260")
|
||||
|
||||
storyline = self.book_data.pop("$259", {})
|
||||
if not self.book.is_kpf_prepub:
|
||||
self.check_empty(storyline, "$259")
|
||||
|
||||
kfx_styles = self.book_data.pop("$157", {})
|
||||
for used_kfx_style in self.used_kfx_styles:
|
||||
kfx_styles.pop(used_kfx_style)
|
||||
|
||||
self.check_empty(kfx_styles, "kfx styles")
|
||||
|
||||
self.book_data.pop("$270", None)
|
||||
self.book_data.pop("$593", None)
|
||||
self.book_data.pop("$ion_symbol_table", None)
|
||||
self.book_data.pop("$270", None)
|
||||
self.book_data.pop("$419", None)
|
||||
self.book_data.pop("$145", None)
|
||||
self.book_data.pop("$608", None)
|
||||
self.book_data.pop("$692", None)
|
||||
self.book_data.pop("$756", None)
|
||||
|
||||
self.book_data.pop("$550", None)
|
||||
|
||||
self.book_data.pop("$265", None)
|
||||
|
||||
self.book_data.pop("$264", None)
|
||||
|
||||
if "$395" in self.book_data:
|
||||
resource_path = self.book_data.pop("$395")
|
||||
for ent in resource_path.pop("$247", []):
|
||||
ent.pop("$175", None)
|
||||
ent.pop("$166", None)
|
||||
self.check_empty(ent, "%s %s" % ("$395", "$247"))
|
||||
|
||||
self.check_empty(resource_path, "$395")
|
||||
|
||||
self.book_data.pop("$609", None)
|
||||
self.book_data.pop("$621", None)
|
||||
|
||||
self.book_data.pop("$597", None)
|
||||
self.book_data.pop("$610", None)
|
||||
self.book_data.pop("$611", None)
|
||||
|
||||
self.book_data.pop("$387", None)
|
||||
self.book_data.pop("$267", None)
|
||||
|
||||
self.check_empty(self.book_data, "Book fragments")
|
||||
|
||||
if self.missing_font_names:
|
||||
if REPORT_MISSING_FONTS:
|
||||
log.warning(
|
||||
"Missing font family names: %s"
|
||||
% list_symbols(self.missing_font_names)
|
||||
)
|
||||
else:
|
||||
log.info(
|
||||
"Missing referenced font family names: %s"
|
||||
% list_symbols(self.missing_font_names)
|
||||
)
|
||||
|
||||
if self.font_names:
|
||||
log.info(
|
||||
"Present referenced font family names: %s"
|
||||
% list_symbols(self.font_names)
|
||||
)
|
||||
|
||||
def decompile_to_epub(self):
|
||||
return self.epub.generate_epub()
|
||||
|
||||
def organize_fragments_by_type(self, fragment_list):
|
||||
font_count = 0
|
||||
categorized_data = {}
|
||||
last_container_id = None
|
||||
|
||||
for fragment in fragment_list:
|
||||
id = fragment.fid
|
||||
self.book_symbols.add(id)
|
||||
|
||||
if fragment.ftype == "$270":
|
||||
id = last_container_id = IonSymbol(
|
||||
"%s:%s"
|
||||
% (fragment.value.get("$161", ""), fragment.value.get("$409", ""))
|
||||
)
|
||||
elif fragment.ftype == "$593":
|
||||
id = last_container_id
|
||||
elif fragment.ftype == "$262":
|
||||
id = IonSymbol("%s-font-%03d" % (id, font_count))
|
||||
font_count += 1
|
||||
elif fragment.ftype == "$387":
|
||||
id = IonSymbol("%s:%s" % (id, fragment.value["$215"]))
|
||||
|
||||
dt = categorized_data.setdefault(fragment.ftype, {})
|
||||
|
||||
if id not in dt:
|
||||
dt[id] = self.replace_ion_data(fragment.value)
|
||||
else:
|
||||
log.error("Book contains multiple %s fragments" % str(fragment))
|
||||
|
||||
for category, ids in categorized_data.items():
|
||||
if len(ids) == 1:
|
||||
id = list(ids)[0]
|
||||
if id == category:
|
||||
categorized_data[category] = categorized_data[category][id]
|
||||
elif None in ids:
|
||||
log.error(
|
||||
"Fragment list contains mixed null/non-null ids of type '%s'"
|
||||
% category
|
||||
)
|
||||
|
||||
return categorized_data
|
||||
|
||||
def determine_book_symbol_format(self):
|
||||
sym_type_counts = collections.defaultdict(lambda: 0)
|
||||
|
||||
for book_symbol in self.book_symbols:
|
||||
symbol_type = self.book.classify_symbol(book_symbol)
|
||||
sym_type_counts[symbol_type] += 1
|
||||
|
||||
sym_type_counts[SYM_TYPE.ORIGINAL] += sym_type_counts[SYM_TYPE.UNKNOWN] // 10
|
||||
|
||||
symbol_quarum = (
|
||||
sym_type_counts[SYM_TYPE.DICTIONARY]
|
||||
+ sym_type_counts[SYM_TYPE.SHORT]
|
||||
+ sym_type_counts[SYM_TYPE.BASE64]
|
||||
+ sym_type_counts[SYM_TYPE.ORIGINAL]
|
||||
) // 2
|
||||
|
||||
if sym_type_counts[
|
||||
SYM_TYPE.SHORT
|
||||
] >= symbol_quarum or "max_id" in self.book_data.get("$538", {}):
|
||||
self.book_symbol_format = SYM_TYPE.SHORT
|
||||
elif sym_type_counts[SYM_TYPE.DICTIONARY] >= symbol_quarum:
|
||||
self.book_symbol_format = SYM_TYPE.DICTIONARY
|
||||
elif sym_type_counts[SYM_TYPE.BASE64] >= symbol_quarum:
|
||||
self.book_symbol_format = SYM_TYPE.BASE64
|
||||
else:
|
||||
self.book_symbol_format = SYM_TYPE.ORIGINAL
|
||||
|
||||
if self.book_symbol_format != SYM_TYPE.SHORT:
|
||||
log.info("Book symbol format is %s" % self.book_symbol_format)
|
||||
|
||||
def unique_part_of_local_symbol(self, symbol):
|
||||
name = str(symbol)
|
||||
|
||||
if self.book_symbol_format == SYM_TYPE.SHORT:
|
||||
name = re.sub(r"^resource/", "", name, count=1)
|
||||
pass
|
||||
elif self.book_symbol_format == SYM_TYPE.DICTIONARY:
|
||||
name = re.sub(r"^G", "", name, count=1)
|
||||
elif self.book_symbol_format == SYM_TYPE.BASE64:
|
||||
name = re.sub(r"^(resource/)?[a-zA-Z0-9_-]{22}", "", name, count=1)
|
||||
else:
|
||||
name = re.sub(
|
||||
r"^V_[0-9]_[0-9](-PARA|-CHAR)?-[0-9]_[0-9]_[0-9a-f]{12,16}_[0-9a-f]{1,5}",
|
||||
"",
|
||||
name,
|
||||
count=1,
|
||||
)
|
||||
name = re.sub(
|
||||
r"^(fonts/|images/)?(res|resource)_[0-9]_[0-9]_[0-9a-f]{12,16}_[0-9a-f]{1,5}_",
|
||||
"",
|
||||
name,
|
||||
count=1,
|
||||
)
|
||||
name = re.sub(UUID_MATCH_RE, "", name, count=1)
|
||||
|
||||
while name.startswith("-") or name.startswith("_"):
|
||||
name = name[1:]
|
||||
|
||||
return name
|
||||
|
||||
def prefix_unique_part_of_symbol(self, unique_part, prefix):
|
||||
if not unique_part:
|
||||
return prefix
|
||||
|
||||
if re.match("^[A-Za-z0-9]+(-.+)?$", unique_part) or not re.match(
|
||||
"^[A-Za-z]", unique_part
|
||||
):
|
||||
return "%s_%s" % (prefix, unique_part)
|
||||
|
||||
return unique_part
|
||||
|
||||
def replace_ion_data(self, f):
|
||||
data_type = ion_type(f)
|
||||
|
||||
if data_type is IonAnnotation:
|
||||
return self.replace_ion_data(f.value)
|
||||
|
||||
if data_type is IonList:
|
||||
return [self.replace_ion_data(fc) for fc in f]
|
||||
|
||||
if data_type is IonSExp:
|
||||
return IonSExp([self.replace_ion_data(fc) for fc in f])
|
||||
|
||||
if data_type is IonStruct:
|
||||
newf = IonStruct()
|
||||
for fk, fv in f.items():
|
||||
newf[self.replace_ion_data(fk)] = self.replace_ion_data(fv)
|
||||
|
||||
return newf
|
||||
|
||||
if data_type is IonSymbol:
|
||||
self.book_symbols.add(f)
|
||||
|
||||
return f
|
||||
|
||||
def get_fragment(self, ftype=None, fid=None, delete=True):
|
||||
if ion_type(fid) not in [IonString, IonSymbol]:
|
||||
return fid
|
||||
|
||||
if ftype in self.book_data:
|
||||
fragment_container = self.book_data[ftype]
|
||||
elif ftype == "$393" and "$394" in self.book_data:
|
||||
fragment_container = self.book_data["$394"]
|
||||
else:
|
||||
fragment_container = {}
|
||||
|
||||
data = (
|
||||
fragment_container.pop(fid, None) if delete else fragment_container.get(fid)
|
||||
)
|
||||
if data is None:
|
||||
used_data = self.used_fragments.get((ftype, fid))
|
||||
if used_data is not None:
|
||||
if RETAIN_USED_FRAGMENTS:
|
||||
log.warning(
|
||||
"book fragment used multiple times: %s %s" % (ftype, fid)
|
||||
)
|
||||
data = used_data
|
||||
else:
|
||||
log.error("book fragment used multiple times: %s %s" % (ftype, fid))
|
||||
data = IonStruct()
|
||||
else:
|
||||
log.error("book is missing fragment: %s %s" % (ftype, fid))
|
||||
data = IonStruct()
|
||||
else:
|
||||
self.used_fragments[(ftype, fid)] = (
|
||||
copy.deepcopy(data) if RETAIN_USED_FRAGMENTS else True
|
||||
)
|
||||
|
||||
data_name = self.get_fragment_name(data, ftype, delete=False)
|
||||
if data_name and data_name != fid:
|
||||
log.error("Expected %s named %s but found %s" % (ftype, fid, data_name))
|
||||
return data
|
||||
|
||||
def get_named_fragment(self, structure, ftype=None, delete=True, name_symbol=None):
|
||||
return self.get_fragment(
|
||||
ftype=ftype,
|
||||
fid=structure.pop(name_symbol or FRAGMENT_NAME_SYMBOL[ftype]),
|
||||
delete=delete,
|
||||
)
|
||||
|
||||
def get_location_id(self, structure):
|
||||
id = structure.pop("$155", None) or structure.pop("$598", None)
|
||||
if id is not None:
|
||||
id = str(id)
|
||||
|
||||
return id
|
||||
|
||||
def check_fragment_name(self, fragment_data, ftype, fid, delete=True):
|
||||
name = self.get_fragment_name(fragment_data, ftype, delete)
|
||||
if name != fid:
|
||||
log.error("Fragment %s %s has incorrect name %s" % (ftype, fid, name))
|
||||
|
||||
def get_fragment_name(self, fragment_data, ftype, delete=True):
|
||||
return self.get_structure_name(
|
||||
fragment_data, FRAGMENT_NAME_SYMBOL[ftype], delete
|
||||
)
|
||||
|
||||
def get_structure_name(self, structure, name_key, delete=True):
|
||||
return (
|
||||
structure.pop(name_key, None) if delete else structure.get(name_key, None)
|
||||
)
|
||||
|
||||
def check_empty(self, a_dict, dict_name):
|
||||
check_empty(a_dict, dict_name)
|
||||
|
||||
def fix_html_id(self, id):
|
||||
return self.epub.fix_html_id(id)
|
||||
2495
kindle_download_helper/third_party/kfxlib/yj_to_epub_content.py
vendored
Normal file
2495
kindle_download_helper/third_party/kfxlib/yj_to_epub_content.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
279
kindle_download_helper/third_party/kfxlib/yj_to_epub_metadata.py
vendored
Normal file
279
kindle_download_helper/third_party/kfxlib/yj_to_epub_metadata.py
vendored
Normal file
@@ -0,0 +1,279 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from .message_logging import log
|
||||
from .python_transition import IS_PYTHON2
|
||||
from .yj_structure import METADATA_NAMES, SYM_TYPE
|
||||
from .yj_to_epub_properties import (
|
||||
DEFAULT_DOCUMENT_FONT_FAMILY,
|
||||
DEFAULT_DOCUMENT_FONT_SIZE,
|
||||
DEFAULT_DOCUMENT_LINE_HEIGHT,
|
||||
DEFAULT_FONT_NAMES,
|
||||
DEFAULT_KC_COMIC_FONT_SIZE,
|
||||
)
|
||||
|
||||
if IS_PYTHON2:
|
||||
from .python_transition import str
|
||||
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2016-2022, John Howell <jhowell@acm.org>"
|
||||
|
||||
|
||||
ORIENTATIONS = {
|
||||
"$385": "portrait",
|
||||
"$386": "landscape",
|
||||
"$349": "none",
|
||||
}
|
||||
|
||||
|
||||
class KFX_EPUB_Metadata(object):
|
||||
def process_document_data(self):
|
||||
document_data = self.book_data.pop("$538", {})
|
||||
|
||||
if "$433" in document_data:
|
||||
orientation_lock_ = document_data.pop("$433")
|
||||
if orientation_lock_ in ORIENTATIONS:
|
||||
self.epub.orientation_lock = ORIENTATIONS[orientation_lock_]
|
||||
else:
|
||||
log.error("Unexpected orientation_lock: %s" % orientation_lock_)
|
||||
self.epub.orientation_lock = "none"
|
||||
else:
|
||||
self.epub.orientation_lock = "none"
|
||||
|
||||
if "$436" in document_data:
|
||||
selection = document_data.pop("$436")
|
||||
if selection not in ["$442", "$441"]:
|
||||
log.error("Unexpected document selection: %s" % selection)
|
||||
|
||||
if "$477" in document_data:
|
||||
spacing_percent_base = document_data.pop("$477")
|
||||
if spacing_percent_base != "$56":
|
||||
log.error(
|
||||
"Unexpected document spacing_percent_base: %s"
|
||||
% spacing_percent_base
|
||||
)
|
||||
|
||||
if "$581" in document_data:
|
||||
pan_zoom = document_data.pop("$581")
|
||||
if pan_zoom != "$441":
|
||||
log.error("Unexpected document pan_zoom: %s" % pan_zoom)
|
||||
|
||||
if "$665" in document_data:
|
||||
self.epub.set_book_type("comic")
|
||||
comic_panel_view_mode = document_data.pop("$665")
|
||||
if comic_panel_view_mode != "$666":
|
||||
log.error(
|
||||
"Unexpected comic panel view mode: %s" % comic_panel_view_mode
|
||||
)
|
||||
|
||||
if "$668" in document_data:
|
||||
auto_contrast = document_data.pop("$668")
|
||||
if auto_contrast != "$573":
|
||||
log.error("Unexpected auto_contrast: %s" % auto_contrast)
|
||||
|
||||
document_data.pop("$597", None)
|
||||
|
||||
if "max_id" in document_data:
|
||||
max_id = document_data.pop("max_id")
|
||||
if self.book_symbol_format != SYM_TYPE.SHORT:
|
||||
log.error(
|
||||
"Unexpected document_data max_id=%s for %s symbol format"
|
||||
% (max_id, self.book_symbol_format)
|
||||
)
|
||||
elif self.book_symbol_format == SYM_TYPE.SHORT:
|
||||
log.error(
|
||||
"Book has %s symbol format without document_data max_id"
|
||||
% self.book_symbol_format
|
||||
)
|
||||
|
||||
document_data.pop("yj.semantics.book_theme_metadata", None)
|
||||
|
||||
document_data.pop("yj.semantics.containers_with_semantics", None)
|
||||
|
||||
document_data.pop("yj.semantics.page_number_begin", None)
|
||||
|
||||
document_data.pop("yj.print.settings", None)
|
||||
document_data.pop("yj.authoring.auto_panel_settings_auto_mask_color_flag", None)
|
||||
document_data.pop("yj.authoring.auto_panel_settings_mask_color", None)
|
||||
document_data.pop("yj.authoring.auto_panel_settings_opacity", None)
|
||||
document_data.pop("yj.authoring.auto_panel_settings_padding_bottom", None)
|
||||
document_data.pop("yj.authoring.auto_panel_settings_padding_left", None)
|
||||
document_data.pop("yj.authoring.auto_panel_settings_padding_right", None)
|
||||
document_data.pop("yj.authoring.auto_panel_settings_padding_top", None)
|
||||
|
||||
self.reading_orders = document_data.pop("$169", [])
|
||||
|
||||
self.font_name_replacements["default"] = DEFAULT_DOCUMENT_FONT_FAMILY
|
||||
|
||||
doc_style = self.process_content_properties(document_data)
|
||||
|
||||
column_count = doc_style.pop("column-count", "auto")
|
||||
if column_count != "auto":
|
||||
log.warning("Unexpected document column_count: %s" % column_count)
|
||||
|
||||
self.epub.page_progression_direction = doc_style.pop("direction", "ltr")
|
||||
|
||||
self.default_font_family = doc_style.pop(
|
||||
"font-family", DEFAULT_DOCUMENT_FONT_FAMILY
|
||||
)
|
||||
|
||||
for default_name in DEFAULT_FONT_NAMES:
|
||||
for font_family in self.split_font_family_value(self.default_font_family):
|
||||
self.font_name_replacements[default_name] = font_family
|
||||
|
||||
self.default_font_size = doc_style.pop("font-size", DEFAULT_DOCUMENT_FONT_SIZE)
|
||||
if self.default_font_size not in [
|
||||
DEFAULT_DOCUMENT_FONT_SIZE,
|
||||
DEFAULT_KC_COMIC_FONT_SIZE,
|
||||
]:
|
||||
log.warning("Unexpected document font-size: %s" % self.default_font_size)
|
||||
|
||||
self.default_line_height = doc_style.pop(
|
||||
"line-height", DEFAULT_DOCUMENT_LINE_HEIGHT
|
||||
)
|
||||
if self.default_line_height != DEFAULT_DOCUMENT_LINE_HEIGHT:
|
||||
log.warning(
|
||||
"Unexpected document line-height: %s" % self.default_line_height
|
||||
)
|
||||
|
||||
self.epub.writing_mode = doc_style.pop("writing-mode", "horizontal-tb")
|
||||
if self.epub.writing_mode not in [
|
||||
"horizontal-tb",
|
||||
"vertical-lr",
|
||||
"vertical-rl",
|
||||
]:
|
||||
log.warning("Unexpected document writing-mode: %s" % self.epub.writing_mode)
|
||||
|
||||
self.check_empty(doc_style.properties, "document data styles")
|
||||
self.check_empty(document_data, "$538")
|
||||
|
||||
def process_content_features(self):
|
||||
content_features = self.book_data.pop("$585", {})
|
||||
|
||||
for feature in content_features.pop("$590", []):
|
||||
key = "%s/%s" % (feature.pop("$586", ""), feature.pop("$492", ""))
|
||||
version_info = feature.pop("$589", {})
|
||||
version = version_info.pop("version", {})
|
||||
version.pop("$587", "")
|
||||
version.pop("$588", "")
|
||||
|
||||
self.check_empty(version_info, "content_features %s version_info" % key)
|
||||
self.check_empty(feature, "content_features %s feature" % key)
|
||||
|
||||
if content_features.pop("$598", content_features.pop("$155", "$585")) != "$585":
|
||||
log.error("content_features id/kfx_id is incorrect")
|
||||
|
||||
self.check_empty(content_features, "$585")
|
||||
|
||||
def process_metadata(self):
|
||||
self.cover_resource = None
|
||||
|
||||
book_metadata = self.book_data.pop("$490", {})
|
||||
|
||||
for categorised_metadata in book_metadata.pop("$491", []):
|
||||
category = categorised_metadata.pop("$495")
|
||||
for metadata in categorised_metadata.pop("$258"):
|
||||
key = metadata.pop("$492")
|
||||
self.process_metadata_item(category, key, metadata.pop("$307"))
|
||||
self.check_empty(
|
||||
metadata, "categorised_metadata %s/%s" % (category, key)
|
||||
)
|
||||
|
||||
self.check_empty(categorised_metadata, "categorised_metadata %s" % category)
|
||||
|
||||
self.check_empty(book_metadata, "$490")
|
||||
|
||||
for key, value in self.book_data.pop("$258", {}).items():
|
||||
self.process_metadata_item("", METADATA_NAMES.get(key, str(key)), value)
|
||||
|
||||
if self.epub.fixed_layout and not self.epub.is_print_replica:
|
||||
self.epub.set_book_type("comic")
|
||||
|
||||
def process_metadata_item(self, category, key, value):
|
||||
cat_key = "%s/%s" % (category, key) if category else key
|
||||
|
||||
if cat_key == "kindle_title_metadata/ASIN" or cat_key == "ASIN":
|
||||
if not self.epub.asin:
|
||||
self.epub.asin = value
|
||||
elif cat_key == "kindle_title_metadata/author":
|
||||
if value:
|
||||
self.epub.authors.insert(0, value)
|
||||
elif cat_key == "kindle_title_metadata/author_pronunciation":
|
||||
if value:
|
||||
self.epub.author_pronunciations.insert(0, value)
|
||||
elif cat_key == "author":
|
||||
if not self.epub.authors:
|
||||
self.epub.authors = [a.strip() for a in value.split("&") if a]
|
||||
elif (
|
||||
cat_key == "kindle_title_metadata/cde_content_type"
|
||||
or cat_key == "cde_content_type"
|
||||
):
|
||||
self.cde_content_type = value
|
||||
if value == "MAGZ":
|
||||
self.epub.set_book_type("magazine")
|
||||
elif value == "EBSP":
|
||||
self.epub.is_sample = True
|
||||
elif cat_key == "kindle_title_metadata/description" or cat_key == "description":
|
||||
self.epub.description = value.strip()
|
||||
elif cat_key == "kindle_title_metadata/cover_image":
|
||||
self.cover_resource = value
|
||||
elif cat_key == "cover_image":
|
||||
self.cover_resource = value
|
||||
elif cat_key == "kindle_title_metadata/dictionary_lookup":
|
||||
self.epub.is_dictionary = True
|
||||
self.epub.source_language = value.pop("$474")
|
||||
self.epub.target_language = value.pop("$163")
|
||||
self.check_empty(value, "kindle_title_metadata/dictionary_lookup")
|
||||
elif cat_key == "kindle_title_metadata/issue_date":
|
||||
self.epub.pubdate = value
|
||||
elif cat_key == "kindle_title_metadata/language" or cat_key == "language":
|
||||
self.epub.language = self.fix_language(value)
|
||||
elif cat_key == "kindle_title_metadata/publisher" or cat_key == "publisher":
|
||||
self.epub.publisher = value.strip()
|
||||
elif cat_key == "kindle_title_metadata/title" or cat_key == "title":
|
||||
if not self.epub.title:
|
||||
self.epub.title = value.strip()
|
||||
elif cat_key == "kindle_title_metadata/title_pronunciation":
|
||||
if not self.epub.title_pronunciation:
|
||||
self.epub.title_pronunciation = value.strip()
|
||||
elif cat_key == "kindle_ebook_metadata/book_orientation_lock":
|
||||
if value != self.epub.orientation_lock:
|
||||
log.error(
|
||||
"Conflicting orientation lock values: %s, %s"
|
||||
% (self.epub.orientation_lock, value)
|
||||
)
|
||||
self.epub.orientation_lock = value
|
||||
elif cat_key == "kindle_title_metadata/is_dictionary":
|
||||
self.epub.is_dictionary = value
|
||||
elif cat_key == "kindle_title_metadata/is_sample":
|
||||
self.epub.is_sample = value
|
||||
elif cat_key == "kindle_title_metadata/override_kindle_font":
|
||||
self.epub.override_kindle_font = value
|
||||
elif cat_key == "kindle_capability_metadata/continuous_popup_progression":
|
||||
self.epub.set_book_type("comic")
|
||||
elif cat_key == "kindle_capability_metadata/yj_fixed_layout":
|
||||
self.epub.fixed_layout = True
|
||||
elif cat_key == "kindle_capability_metadata/yj_forced_continuous_scroll":
|
||||
self.epub.scrolled_continuous = True
|
||||
elif cat_key == "kindle_capability_metadata/yj_guided_view_native":
|
||||
self.epub.guided_view_native = True
|
||||
elif cat_key == "kindle_capability_metadata/yj_publisher_panels":
|
||||
self.epub.set_book_type("comic")
|
||||
self.epub.region_magnification = True
|
||||
elif cat_key == "kindle_capability_metadata/yj_facing_page":
|
||||
self.epub.set_book_type("comic")
|
||||
elif cat_key == "kindle_capability_metadata/yj_double_page_spread":
|
||||
self.epub.set_book_type("comic")
|
||||
elif cat_key == "kindle_capability_metadata/yj_textbook":
|
||||
self.epub.set_book_type("print replica")
|
||||
elif cat_key == "kindle_capability_metadata/yj_illustrated_layout":
|
||||
self.epub.illustrated_layout = self.epub.html_cover = True
|
||||
elif cat_key == "reading_orders":
|
||||
if not self.reading_orders:
|
||||
self.reading_orders = value
|
||||
elif cat_key == "support_landscape":
|
||||
if value is False and self.epub.orientation_lock == "none":
|
||||
self.epub.orientation_lock = "portrait"
|
||||
elif cat_key == "support_portrait":
|
||||
if value is False and self.epub.orientation_lock == "none":
|
||||
self.epub.orientation_lock = "landscape"
|
||||
810
kindle_download_helper/third_party/kfxlib/yj_to_epub_misc.py
vendored
Normal file
810
kindle_download_helper/third_party/kfxlib/yj_to_epub_misc.py
vendored
Normal file
@@ -0,0 +1,810 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import operator
|
||||
import re
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from .epub_output import SVG, SVG_NAMESPACES, SVG_NS_URI, XLINK_NS_URI, qname
|
||||
from .ion import IonSExp, IonStruct, IonSymbol, ion_type
|
||||
from .ion_symbol_table import LocalSymbolTable
|
||||
from .ion_text import IonText
|
||||
from .message_logging import log
|
||||
from .python_transition import IS_PYTHON2
|
||||
from .utilities import get_url_filename, type_name, urlabspath, urlrelpath
|
||||
from .yj_to_epub_properties import value_str
|
||||
from .yj_versions import KNOWN_SUPPORTED_FEATURES
|
||||
|
||||
if IS_PYTHON2:
|
||||
from .python_transition import repr, str, urllib
|
||||
else:
|
||||
import urllib.parse
|
||||
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2016-2022, John Howell <jhowell@acm.org>"
|
||||
|
||||
|
||||
DEVICE_SCREEN_NARROW_PX = 1200
|
||||
DEVICE_SCREEN_WIDE_PX = 1920
|
||||
|
||||
RENDER_HTML_PLUGIN_AS = "iframe"
|
||||
|
||||
|
||||
class KFX_EPUB_Misc(object):
|
||||
def set_condition_operators(self):
|
||||
if self.epub.orientation_lock == "landscape":
|
||||
screen_width = DEVICE_SCREEN_WIDE_PX
|
||||
screen_height = DEVICE_SCREEN_NARROW_PX
|
||||
else:
|
||||
screen_width = DEVICE_SCREEN_NARROW_PX
|
||||
screen_height = DEVICE_SCREEN_WIDE_PX
|
||||
|
||||
self.condition_operators = {
|
||||
"$305": (0, screen_height),
|
||||
"$304": (0, screen_width),
|
||||
"$300": (0, True),
|
||||
"$301": (0, True),
|
||||
"$183": (0, 0),
|
||||
"$302": (0, screen_width),
|
||||
"$303": (0, screen_height),
|
||||
"$525": (0, (screen_width > screen_height)),
|
||||
"$526": (0, (screen_width < screen_height)),
|
||||
"$660": (0, True),
|
||||
"$293": (1, operator.not_),
|
||||
"$266": (1, None),
|
||||
"$750": (1, None),
|
||||
"$659": (None, None),
|
||||
"$292": (2, operator.and_),
|
||||
"$291": (2, operator.or_),
|
||||
"$294": (2, operator.eq),
|
||||
"$295": (2, operator.ne),
|
||||
"$296": (2, operator.gt),
|
||||
"$297": (2, operator.ge),
|
||||
"$298": (2, operator.lt),
|
||||
"$299": (2, operator.le),
|
||||
"$516": (2, operator.add),
|
||||
"$517": (2, operator.sub),
|
||||
"$518": (2, operator.mul),
|
||||
"$519": (2, operator.truediv),
|
||||
}
|
||||
|
||||
def evaluate_binary_condition(self, condition):
|
||||
value = self.evaluate_condition(condition)
|
||||
if value not in {True, False}:
|
||||
log.error(
|
||||
"Condition has non-binary result (%s): %s"
|
||||
% (str(value), str(condition))
|
||||
)
|
||||
return False
|
||||
|
||||
return value
|
||||
|
||||
def evaluate_condition(self, condition):
|
||||
if ion_type(condition) is IonSExp:
|
||||
op = condition[0]
|
||||
num = len(condition) - 1
|
||||
else:
|
||||
op = condition
|
||||
num = 0
|
||||
|
||||
if (ion_type(op) is not IonSymbol) or (op not in self.condition_operators):
|
||||
log.error("Condition operator is unknown: %s" % str(condition))
|
||||
return False
|
||||
|
||||
nargs, func = self.condition_operators[op]
|
||||
|
||||
if nargs is None:
|
||||
if op == "$659":
|
||||
if tuple(condition[1:]) in KNOWN_SUPPORTED_FEATURES:
|
||||
return True
|
||||
|
||||
log.error("yj.supports feature unknown: %s" % repr(condition))
|
||||
return False
|
||||
|
||||
if nargs != num:
|
||||
log.error(
|
||||
"Condition operator has wrong number of arguments: %s" % str(condition)
|
||||
)
|
||||
return False
|
||||
|
||||
if nargs == 0:
|
||||
return func
|
||||
|
||||
if nargs == 1:
|
||||
if op == "$266":
|
||||
return 0
|
||||
|
||||
if op == "$750":
|
||||
if condition[1] == "$752":
|
||||
return True
|
||||
|
||||
if condition[1] == "$753":
|
||||
return False
|
||||
|
||||
log.error("yj.layout_type unknown: %s" % condition[1])
|
||||
return False
|
||||
|
||||
return func(self.evaluate_condition(condition[1]))
|
||||
|
||||
return func(
|
||||
self.evaluate_condition(condition[1]), self.evaluate_condition(condition[2])
|
||||
)
|
||||
|
||||
def add_svg_wrapper_to_block_image(
|
||||
self, content_elem, book_part, fixed_height=0, fixed_width=0
|
||||
):
|
||||
if len(content_elem) != 1:
|
||||
log.error(
|
||||
"Incorrect div content for SVG wrapper: %s"
|
||||
% etree.tostring(content_elem)
|
||||
)
|
||||
|
||||
for image_div in content_elem.findall("*"):
|
||||
if (
|
||||
image_div.tag == "div"
|
||||
and len(image_div) == 1
|
||||
and image_div[0].tag == "img"
|
||||
):
|
||||
div_style = self.get_style(image_div)
|
||||
div_style.pop("-kfx-style-name", "")
|
||||
div_style.pop("font-size", "")
|
||||
div_style.pop("line-height", "")
|
||||
|
||||
img = image_div[0]
|
||||
img_style = self.get_style(img)
|
||||
img_style.pop("-kfx-style-name", "")
|
||||
img_style.pop("font-size", "")
|
||||
img_style.pop("line-height", "")
|
||||
iheight = img_style.pop("height", "")
|
||||
iwidth = img_style.pop("width", "")
|
||||
try:
|
||||
img_file = self.epub.oebps_files[
|
||||
get_url_filename(
|
||||
urlabspath(img.get("src"), ref_from=book_part.filename)
|
||||
)
|
||||
]
|
||||
|
||||
img_height = img_file.height
|
||||
img_width = img_file.width
|
||||
except Exception as e:
|
||||
print(f"Error {str(e)}")
|
||||
return
|
||||
|
||||
orig_int_height = int_height = px_to_int(iheight)
|
||||
orig_int_width = int_width = px_to_int(iwidth)
|
||||
|
||||
if (int_height and fixed_height and int_height != fixed_height) or (
|
||||
int_width and fixed_width and int_width != fixed_width
|
||||
):
|
||||
log.error(
|
||||
"Unexpected image style for SVG wrapper (fixed h=%d, w=%d): %s"
|
||||
% (fixed_height, fixed_width, etree.tostring(image_div))
|
||||
)
|
||||
|
||||
if int_height and int_width:
|
||||
img_aspect = float(int_height) / float(int_width)
|
||||
svg_aspect = float(img_height) / float(img_width)
|
||||
if abs(img_aspect - svg_aspect) > 0.01:
|
||||
log.error(
|
||||
"Image (h=%d, w=%d) aspect ratio %f does not match SVG wrapper (h=%d, w=%d) %f"
|
||||
% (
|
||||
img_height,
|
||||
img_width,
|
||||
img_aspect,
|
||||
int_height,
|
||||
int_width,
|
||||
svg_aspect,
|
||||
)
|
||||
)
|
||||
else:
|
||||
int_height = img_height
|
||||
int_width = img_width
|
||||
|
||||
if not (
|
||||
div_style.pop("text-align", "center") == "center"
|
||||
and div_style.pop("text-indent", "0") == "0"
|
||||
and img_style.pop("position", "absolute") == "absolute"
|
||||
and img_style.pop("top", "0") == "0"
|
||||
and img_style.pop("left", "0") == "0"
|
||||
and (iheight == "" or orig_int_height)
|
||||
and (
|
||||
iwidth == ""
|
||||
or orig_int_width
|
||||
or re.match(r"^(100|9[5-9].*)%$", iwidth)
|
||||
)
|
||||
and len(img_style) == 0
|
||||
and len(div_style) == 0
|
||||
):
|
||||
log.error(
|
||||
"Unexpected image style for SVG wrapper (img h=%d, w=%d): %s"
|
||||
% (img_height, img_width, etree.tostring(image_div))
|
||||
)
|
||||
|
||||
image_div.remove(img)
|
||||
|
||||
svg = etree.SubElement(
|
||||
image_div,
|
||||
SVG,
|
||||
nsmap=SVG_NAMESPACES,
|
||||
attrib={
|
||||
"version": "1.1",
|
||||
"preserveAspectRatio": "xMidYMid meet",
|
||||
"viewBox": "0 0 %d %d" % (int_width, int_height),
|
||||
"height": "100%",
|
||||
"width": "100%",
|
||||
},
|
||||
)
|
||||
|
||||
self.move_anchors(img, svg)
|
||||
|
||||
etree.SubElement(
|
||||
svg,
|
||||
qname(SVG_NS_URI, "image"),
|
||||
attrib={
|
||||
qname(XLINK_NS_URI, "href"): img.get("src"),
|
||||
"height": "%d" % int_height,
|
||||
"width": "%d" % int_width,
|
||||
},
|
||||
)
|
||||
|
||||
else:
|
||||
log.error(
|
||||
"Incorrect image content for SVG wrapper: %s"
|
||||
% etree.tostring(image_div)
|
||||
)
|
||||
|
||||
def horizontal_fxl_block_images(self, content_elem, book_part):
|
||||
left = 0
|
||||
for image_div in content_elem.findall("*"):
|
||||
if (
|
||||
image_div.tag == "div"
|
||||
and len(image_div) == 1
|
||||
and image_div[0].tag == "img"
|
||||
):
|
||||
img = image_div[0]
|
||||
img_file = self.epub.oebps_files[
|
||||
get_url_filename(
|
||||
urlabspath(img.get("src"), ref_from=book_part.filename)
|
||||
)
|
||||
]
|
||||
img_style = self.get_style(img)
|
||||
|
||||
if (
|
||||
"position" in img_style
|
||||
or "top" in img_style
|
||||
or "left" in img_style
|
||||
or "height" in img_style
|
||||
or "width" in img_style
|
||||
):
|
||||
log.error(
|
||||
"Unexpected image style for horizontal fxl: %s"
|
||||
% etree.tostring(image_div)
|
||||
)
|
||||
|
||||
img_style["position"] = "absolute"
|
||||
img_style["top"] = value_str(0, "px")
|
||||
img_style["left"] = value_str(left, "px")
|
||||
img_style["height"] = value_str(img_file.height, "px")
|
||||
img_style["width"] = value_str(img_file.width, "px")
|
||||
self.set_style(img, img_style)
|
||||
|
||||
left += img_file.width
|
||||
|
||||
else:
|
||||
log.error(
|
||||
"Incorrect image content for horizontal fxl: %s"
|
||||
% etree.tostring(image_div)
|
||||
)
|
||||
|
||||
def process_kvg_shape(self, parent, shape, content_list, book_part, writing_mode):
|
||||
shape_type = shape.pop("$159")
|
||||
if shape_type == "$273":
|
||||
elem = etree.SubElement(
|
||||
parent,
|
||||
qname(SVG_NS_URI, "path"),
|
||||
attrib={"d": self.process_path(shape.pop("$249"))},
|
||||
)
|
||||
|
||||
elif shape_type == "$270":
|
||||
source = shape.pop("$474")
|
||||
|
||||
for i, content in enumerate(content_list):
|
||||
if ion_type(content) is IonSymbol:
|
||||
content = self.get_fragment(ftype="$608", fid=content)
|
||||
|
||||
if content.get("$155") == source or content.get("$598") == source:
|
||||
break
|
||||
else:
|
||||
log.error("Missing KVG container content ID: %s" % source)
|
||||
return
|
||||
|
||||
content_list.pop(i)
|
||||
self.process_content(content, parent, book_part, writing_mode)
|
||||
elem = parent[-1]
|
||||
|
||||
if elem.tag != "div":
|
||||
log.error("Unexpected non-text content in KVG container: %s" % elem.tag)
|
||||
return
|
||||
|
||||
elem.tag = qname(SVG_NS_URI, "text")
|
||||
|
||||
else:
|
||||
log.error("Unexpected shape type: %s" % shape_type)
|
||||
return
|
||||
|
||||
for yj_property_name, svg_attrib in [
|
||||
("$70", "fill"),
|
||||
("$72", "fill-opacity"),
|
||||
("$75", "stroke"),
|
||||
("$77", "stroke-linecap"),
|
||||
("$529", "stroke-linejoin"),
|
||||
("$530", "stroke-miterlimit"),
|
||||
("$76", "stroke-width"),
|
||||
("$98", "transform"),
|
||||
]:
|
||||
if yj_property_name in shape:
|
||||
elem.set(
|
||||
svg_attrib,
|
||||
self.property_value(
|
||||
yj_property_name, shape.pop(yj_property_name), svg=True
|
||||
),
|
||||
)
|
||||
|
||||
if "stroke" in elem.attrib and "fill" not in elem.attrib:
|
||||
elem.set("fill", "none")
|
||||
|
||||
self.check_empty(shape, "shape")
|
||||
|
||||
def process_path(self, path):
|
||||
if ion_type(path) is IonStruct:
|
||||
path_bundle_name = path.pop("name")
|
||||
path_index = path.pop("$403")
|
||||
self.check_empty(path, "path")
|
||||
|
||||
if (
|
||||
"$692" not in self.book_data
|
||||
or path_bundle_name not in self.book_data["$692"]
|
||||
):
|
||||
log.error("Missing book path_bundle: %s" % path_bundle_name)
|
||||
return ""
|
||||
|
||||
return self.process_path(
|
||||
self.book_data["$692"][path_bundle_name]["$693"][path_index]
|
||||
)
|
||||
|
||||
p = list(path)
|
||||
d = []
|
||||
|
||||
def process_instruction(inst, n_args, pixels=True):
|
||||
d.append(inst)
|
||||
|
||||
for j in range(n_args):
|
||||
if len(p) == 0:
|
||||
log.error("Incomplete path instruction in %s" % str(path))
|
||||
return
|
||||
|
||||
v = p.pop(0)
|
||||
if pixels:
|
||||
v = self.adjust_pixel_value(v)
|
||||
|
||||
d.append(value_str(v))
|
||||
|
||||
while len(p) > 0:
|
||||
inst = p.pop(0)
|
||||
if inst == 0:
|
||||
process_instruction("M", 2)
|
||||
|
||||
elif inst == 1:
|
||||
process_instruction("L", 2)
|
||||
|
||||
elif inst == 2:
|
||||
process_instruction("Q", 4)
|
||||
|
||||
elif inst == 3:
|
||||
process_instruction("C", 6)
|
||||
|
||||
elif inst == 4:
|
||||
process_instruction("Z", 0)
|
||||
|
||||
else:
|
||||
log.error(
|
||||
"Unexpected path instruction %s in %s" % (str(inst), str(path))
|
||||
)
|
||||
break
|
||||
|
||||
return " ".join(d)
|
||||
|
||||
def process_polygon(self, path):
|
||||
def percent_value_str(v):
|
||||
return value_str(v * 100, "%", emit_zero_unit=True)
|
||||
|
||||
d = []
|
||||
|
||||
i = 0
|
||||
ln = len(path)
|
||||
while i < ln:
|
||||
inst = path[i]
|
||||
if inst == 0 or inst == 1:
|
||||
if i + 3 > ln:
|
||||
log.error("Bad path instruction in %s" % str(path))
|
||||
break
|
||||
|
||||
d.append(
|
||||
"%s %s"
|
||||
% (percent_value_str(path[i + 1]), percent_value_str(path[i + 2]))
|
||||
)
|
||||
i += 3
|
||||
|
||||
elif inst == 4:
|
||||
i += 1
|
||||
|
||||
else:
|
||||
log.error(
|
||||
"Unexpected path instruction %s in %s" % (str(inst), str(path))
|
||||
)
|
||||
break
|
||||
|
||||
return "polygon(%s)" % (", ".join(d))
|
||||
|
||||
def process_transform(self, vals, svg):
|
||||
if svg:
|
||||
px = ""
|
||||
sep = " "
|
||||
else:
|
||||
px = "px"
|
||||
sep = ","
|
||||
|
||||
if len(vals) == 6:
|
||||
vals[4] = self.adjust_pixel_value(vals[4])
|
||||
vals[5] = self.adjust_pixel_value(vals[5])
|
||||
|
||||
if vals[4:6] == [0.0, 0.0]:
|
||||
translate = ""
|
||||
else:
|
||||
translate = "translate(%s%s%s) " % (
|
||||
value_str(vals[4], px),
|
||||
sep,
|
||||
value_str(vals[5], px),
|
||||
)
|
||||
|
||||
if vals[0:4] == [1.0, 0.0, 0.0, 1.0] and translate:
|
||||
return translate.strip()
|
||||
|
||||
if vals[1:3] == [0.0, 0.0]:
|
||||
if vals[0] == vals[3]:
|
||||
return translate + ("scale(%s)" % value_str(vals[0]))
|
||||
|
||||
return translate + (
|
||||
"scale(%s%s%s)" % (value_str(vals[0]), sep, value_str(vals[3]))
|
||||
)
|
||||
|
||||
if vals[0:4] == [0.0, 1.0, -1.0, 0.0]:
|
||||
return translate + "rotate(-90deg)"
|
||||
|
||||
if vals[0:4] == [0.0, -1.0, 1.0, 0.0]:
|
||||
return translate + "rotate(90deg)"
|
||||
|
||||
if vals[0:4] == [-1.0, 0.0, 0.0, -1.0]:
|
||||
return translate + "rotate(180deg)"
|
||||
|
||||
log.error("Unexpected transform matrix: %s" % str(vals))
|
||||
return "matrix(%s)" % (sep.join([value_str(v) for v in vals]))
|
||||
|
||||
log.error("Unexpected transform: %s" % str(vals))
|
||||
return "?"
|
||||
|
||||
def process_plugin(
|
||||
self, resource_name, alt_text, content_elem, book_part, is_html=False
|
||||
):
|
||||
res = self.process_external_resource(resource_name, save=False, is_plugin=True)
|
||||
|
||||
if is_html or res.mime == "plugin/kfx-html-article":
|
||||
src = urlrelpath(
|
||||
self.process_external_resource(
|
||||
resource_name, is_plugin=True, save_referred=True
|
||||
).filename,
|
||||
ref_from=book_part.filename,
|
||||
)
|
||||
|
||||
if RENDER_HTML_PLUGIN_AS == "iframe":
|
||||
content_elem.tag = "iframe"
|
||||
content_elem.set("src", src)
|
||||
self.add_style(
|
||||
content_elem,
|
||||
{
|
||||
"height": "100%",
|
||||
"width": "100%",
|
||||
"border-bottom-style": "none",
|
||||
"border-left-style": "none",
|
||||
"border-right-style": "none",
|
||||
"border-top-style": "none",
|
||||
},
|
||||
)
|
||||
elif RENDER_HTML_PLUGIN_AS == "object":
|
||||
content_elem.tag = "object"
|
||||
content_elem.set("data", src)
|
||||
content_elem.set("type", "text/html")
|
||||
self.add_style(
|
||||
content_elem,
|
||||
{
|
||||
"height": "100%",
|
||||
"width": "100%",
|
||||
"border-bottom-style": "none",
|
||||
"border-left-style": "none",
|
||||
"border-right-style": "none",
|
||||
"border-top-style": "none",
|
||||
},
|
||||
)
|
||||
else:
|
||||
content_elem.tag = "a"
|
||||
content_elem.set("href", src)
|
||||
content_elem.text = "[click here to read the content]"
|
||||
|
||||
elif res.format == "$284":
|
||||
content_elem.tag = "img"
|
||||
content_elem.set(
|
||||
"src",
|
||||
urlrelpath(
|
||||
self.process_external_resource(resource_name).filename,
|
||||
ref_from=book_part.filename,
|
||||
),
|
||||
)
|
||||
content_elem.set("alt", alt_text)
|
||||
|
||||
else:
|
||||
manifest_raw_media = res.raw_media.decode("utf-8")
|
||||
|
||||
manifest_symtab = LocalSymbolTable(
|
||||
context="plugin %s" % resource_name, ignore_undef=True
|
||||
)
|
||||
|
||||
try:
|
||||
manifest_ = IonText(symtab=manifest_symtab).deserialize_annotated_value(
|
||||
manifest_raw_media, import_symbols=None
|
||||
)
|
||||
except Exception:
|
||||
log.error("Exception processing plugin %s" % resource_name)
|
||||
raise
|
||||
|
||||
manifest_symtab.report()
|
||||
plugin_type = manifest_.get_annotation()
|
||||
manifest = manifest_.value
|
||||
|
||||
if plugin_type == "audio":
|
||||
self.process_external_resource(
|
||||
resource_name, save=False, is_plugin=True, process_referred=True
|
||||
)
|
||||
|
||||
content_elem.tag = "audio"
|
||||
content_elem.set("controls", "")
|
||||
src = self.uri_reference(
|
||||
manifest["facets"]["media"]["uri"], manifest_external_refs=True
|
||||
)
|
||||
content_elem.set("src", urlrelpath(src, ref_from=book_part.filename))
|
||||
|
||||
player = manifest["facets"]["player"]
|
||||
for image_refs in ["play_images", "pause_images"]:
|
||||
for uri in player.get(image_refs, []):
|
||||
self.uri_reference(uri, save=False)
|
||||
|
||||
elif plugin_type == "button":
|
||||
RENDER_BUTTON_PLUGIN = True
|
||||
content_elem.tag = "div"
|
||||
|
||||
for image in manifest["facets"]["images"]:
|
||||
if image["role"] != "upstate":
|
||||
log.warning(
|
||||
"Unknown button image role %s in %s"
|
||||
% (image["role"], resource_name)
|
||||
)
|
||||
|
||||
if RENDER_BUTTON_PLUGIN:
|
||||
img = etree.SubElement(content_elem, "img")
|
||||
img.set(
|
||||
"src",
|
||||
urlrelpath(
|
||||
self.uri_reference(image["uri"]),
|
||||
ref_from=book_part.filename,
|
||||
),
|
||||
)
|
||||
img.set("alt", alt_text)
|
||||
self.add_style(img, {"max-width": "100%"})
|
||||
else:
|
||||
self.uri_reference(image["uri"], save=False)
|
||||
|
||||
clicks = manifest["events"]["click"]
|
||||
|
||||
for click in clicks if isinstance(clicks, list) else [clicks]:
|
||||
if click["name"] != "change_state":
|
||||
log.warning(
|
||||
"Unknown button event click name %s in %s"
|
||||
% (click["name"], resource_name)
|
||||
)
|
||||
|
||||
self.process_external_resource(
|
||||
resource_name, is_plugin=True, save=False, process_referred=True
|
||||
)
|
||||
|
||||
elif plugin_type == "hyperlink":
|
||||
content_elem.tag = "a"
|
||||
self.add_style(content_elem, {"height": "100%", "width": "100%"})
|
||||
|
||||
uri = manifest["facets"]["uri"]
|
||||
if uri:
|
||||
content_elem.set(
|
||||
"href",
|
||||
urlrelpath(
|
||||
self.uri_reference(uri), ref_from=book_part.filename
|
||||
),
|
||||
)
|
||||
|
||||
elif plugin_type == "image_sequence":
|
||||
content_elem.tag = "div"
|
||||
|
||||
for image in manifest["facets"]["images"]:
|
||||
img = etree.SubElement(content_elem, "img")
|
||||
img.set(
|
||||
"src",
|
||||
urlrelpath(
|
||||
self.uri_reference(image["uri"]),
|
||||
ref_from=book_part.filename,
|
||||
),
|
||||
)
|
||||
img.set("alt", alt_text)
|
||||
|
||||
elif plugin_type in ["scrollable", "slideshow"]:
|
||||
content_elem.tag = "div"
|
||||
|
||||
if manifest["properties"].get("initial_visibility") == "hide":
|
||||
self.add_style(content_elem, {"visibility": "hidden"})
|
||||
|
||||
if "alt_text" in manifest["properties"]:
|
||||
alt_text = manifest["properties"]["alt_text"]
|
||||
|
||||
for child in manifest["facets"]["children"]:
|
||||
self.process_plugin_uri(
|
||||
child["uri"], child["bounds"], content_elem, book_part
|
||||
)
|
||||
|
||||
if plugin_type == "scrollable":
|
||||
self.process_external_resource(
|
||||
resource_name, is_plugin=True, save=False, process_referred=True
|
||||
)
|
||||
|
||||
elif plugin_type == "video":
|
||||
content_elem.tag = "video"
|
||||
|
||||
if manifest["properties"].get("user_interaction") == "enabled":
|
||||
content_elem.set("controls", "")
|
||||
|
||||
if (
|
||||
manifest.get("events", {}).get("enter_view", {}).get("name")
|
||||
== "start"
|
||||
):
|
||||
content_elem.set("autoplay", "")
|
||||
|
||||
if (
|
||||
manifest["properties"].get("play_context", {}).get("loop_count", 0)
|
||||
< 0
|
||||
):
|
||||
content_elem.set("loop", "")
|
||||
|
||||
if "poster" in manifest["facets"]:
|
||||
content_elem.set(
|
||||
"poster",
|
||||
urlrelpath(
|
||||
self.uri_reference(manifest["facets"]["poster"]["uri"]),
|
||||
ref_from=book_part.filename,
|
||||
),
|
||||
)
|
||||
|
||||
if "first_frame" in manifest["facets"]:
|
||||
self.uri_reference(
|
||||
manifest["facets"]["first_frame"]["uri"], save=False
|
||||
)
|
||||
|
||||
alt_text = alt_text or "Cannot display %s content" % plugin_type
|
||||
|
||||
src = self.uri_reference(
|
||||
manifest["facets"]["media"]["uri"], manifest_external_refs=True
|
||||
)
|
||||
|
||||
content_elem.set("src", urlrelpath(src, ref_from=book_part.filename))
|
||||
|
||||
dummy_elem = etree.Element("dummy")
|
||||
while len(content_elem) > 0:
|
||||
e = content_elem[0]
|
||||
content_elem.remove(e)
|
||||
dummy_elem.append(e)
|
||||
|
||||
self.move_anchors(dummy_elem, content_elem)
|
||||
|
||||
elif plugin_type == "webview":
|
||||
self.process_external_resource(
|
||||
resource_name, is_plugin=True, save=False, save_referred=True
|
||||
)
|
||||
purl = urllib.parse.urlparse(manifest["facets"]["uri"])
|
||||
|
||||
if purl.scheme == "kfx":
|
||||
self.process_plugin(
|
||||
urllib.parse.unquote(purl.netloc + purl.path),
|
||||
alt_text,
|
||||
content_elem,
|
||||
book_part,
|
||||
is_html=True,
|
||||
)
|
||||
else:
|
||||
log.error("Unexpected webview plugin URI scheme: %s" % uri)
|
||||
|
||||
elif plugin_type == "zoomable":
|
||||
content_elem.tag = "img"
|
||||
content_elem.set(
|
||||
"src",
|
||||
urlrelpath(
|
||||
self.uri_reference(manifest["facets"]["media"]["uri"]),
|
||||
ref_from=book_part.filename,
|
||||
),
|
||||
)
|
||||
content_elem.set("alt", alt_text)
|
||||
|
||||
else:
|
||||
log.error(
|
||||
"Unknown plugin type %s in resource %s"
|
||||
% (plugin_type, resource_name)
|
||||
)
|
||||
|
||||
content_elem.tag = "object"
|
||||
src = self.process_external_resource(
|
||||
resource_name, is_plugin=True, save_referred=True
|
||||
).filename
|
||||
content_elem.set("data", urlrelpath(src, ref_from=book_part.filename))
|
||||
content_elem.set("type", self.epub.oebps_files[src].mimetype)
|
||||
|
||||
if len(content_elem) == 0:
|
||||
content_elem.text = (
|
||||
alt_text or "Cannot display %s content" % plugin_type
|
||||
)
|
||||
|
||||
def process_plugin_uri(self, uri, bounds, content_elem, book_part):
|
||||
purl = urllib.parse.urlparse(uri)
|
||||
|
||||
if purl.scheme == "kfx":
|
||||
child_elem = etree.SubElement(content_elem, "plugin-temp")
|
||||
self.process_plugin(
|
||||
urllib.parse.unquote(purl.netloc + purl.path), "", child_elem, book_part
|
||||
)
|
||||
self.process_bounds(child_elem, bounds)
|
||||
else:
|
||||
log.error("Unexpected plugin URI scheme: %s" % uri)
|
||||
|
||||
def process_bounds(self, elem, bounds):
|
||||
for bound, property_name in [
|
||||
("x", "left"),
|
||||
("y", "top"),
|
||||
("h", "height"),
|
||||
("w", "width"),
|
||||
]:
|
||||
if bound in bounds:
|
||||
bound_value = bounds[bound]
|
||||
if ion_type(bound_value) is IonStruct:
|
||||
unit = bound_value.pop("unit")
|
||||
value = value_str(
|
||||
bound_value.pop("value"), "%" if unit == "percent" else unit
|
||||
)
|
||||
self.check_empty(bound_value, "Bound %s value" % property_name)
|
||||
|
||||
self.add_style(elem, {property_name: value}, replace=True)
|
||||
|
||||
if bound in ["x", "y"]:
|
||||
self.add_style(elem, {"position": "absolute"})
|
||||
else:
|
||||
log.error(
|
||||
"Unexpected bound data type %s: %s"
|
||||
% (type_name(bound), repr(bound))
|
||||
)
|
||||
|
||||
|
||||
def px_to_int(s):
|
||||
m = re.match(r"^([0-9]+)(px)?$", s)
|
||||
return int(m.group(1)) if m else 0
|
||||
557
kindle_download_helper/third_party/kfxlib/yj_to_epub_navigation.py
vendored
Normal file
557
kindle_download_helper/third_party/kfxlib/yj_to_epub_navigation.py
vendored
Normal file
@@ -0,0 +1,557 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from .epub_output import TocEntry
|
||||
from .message_logging import log
|
||||
from .python_transition import IS_PYTHON2
|
||||
from .utilities import make_unique_name, urlrelpath
|
||||
from .yj_position_location import DEBUG_PAGES
|
||||
from .yj_structure import APPROXIMATE_PAGE_LIST
|
||||
|
||||
if IS_PYTHON2:
|
||||
from .python_transition import str, urllib
|
||||
else:
|
||||
import urllib.parse
|
||||
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2016-2022, John Howell <jhowell@acm.org>"
|
||||
|
||||
|
||||
KEEP_APPROX_PG_NUMS = False
|
||||
|
||||
REPORT_DUPLICATE_PAGES = False
|
||||
PREVENT_DUPLICATE_PAGE_LABELS = False
|
||||
PREVENT_DUPLICATE_PAGE_TARGETS = False
|
||||
|
||||
GUIDE_TYPE_OF_LANDMARK_TYPE = {
|
||||
"$233": "cover",
|
||||
"$396": "text",
|
||||
"$269": "text",
|
||||
"$212": "toc",
|
||||
}
|
||||
|
||||
|
||||
PERIODICAL_NCX_CLASSES = {
|
||||
0: "section",
|
||||
1: "article",
|
||||
}
|
||||
|
||||
|
||||
class KFX_EPUB_Navigation(object):
|
||||
def process_navigation(self):
|
||||
for section_nav in self.book_data.pop("$390", []):
|
||||
section_name = section_nav.pop("$174")
|
||||
for nav_container in section_nav.pop("$392", []):
|
||||
self.nav_container_section[nav_container] = section_name
|
||||
|
||||
self.check_empty(section_nav, "section_navigation")
|
||||
|
||||
book_navigations = self.book_data.pop("$389", [])
|
||||
|
||||
for reading_order in self.reading_orders:
|
||||
reading_order_name = reading_order.get("$178", "")
|
||||
|
||||
for i, book_navigation in enumerate(book_navigations):
|
||||
if book_navigation.get("$178", "") == reading_order_name:
|
||||
book_navigations.pop(i)
|
||||
book_navigation.pop("$178", None)
|
||||
|
||||
for nav_container_ in book_navigation.pop("$392"):
|
||||
nav_container = self.get_fragment(
|
||||
ftype="$391", fid=nav_container_
|
||||
)
|
||||
self.process_nav_container(
|
||||
nav_container, nav_container_, reading_order_name
|
||||
)
|
||||
self.check_empty(book_navigation, "book_navigation")
|
||||
|
||||
break
|
||||
else:
|
||||
log.warning(
|
||||
'Failed to locate navigation for reading order "%s"'
|
||||
% reading_order_name
|
||||
)
|
||||
|
||||
self.check_empty(book_navigations, "book_navigation")
|
||||
|
||||
nav_container = self.book_data.pop("$391", {})
|
||||
if not self.book.is_kpf_prepub:
|
||||
self.check_empty(nav_container, "nav_container")
|
||||
|
||||
self.check_empty(self.book_data.pop("$394", {}), "conditional_nav_group_unit")
|
||||
|
||||
def process_nav_container(
|
||||
self, nav_container, nav_container_name, reading_order_name
|
||||
):
|
||||
nav_container.pop("mkfx_id", None)
|
||||
nav_container_name = nav_container.pop("$239", nav_container_name)
|
||||
section_name = self.nav_container_section.get(nav_container_name)
|
||||
nav_type = nav_container.pop("$235")
|
||||
if nav_type not in {"$212", "$236", "$237", "$213", "$214"}:
|
||||
log.error(
|
||||
"nav_container %s has unknown type: %s" % (nav_container_name, nav_type)
|
||||
)
|
||||
|
||||
if "imports" in nav_container:
|
||||
for import_name in nav_container.pop("imports"):
|
||||
self.process_nav_container(
|
||||
self.book_data["$391"].pop(import_name),
|
||||
nav_container_name,
|
||||
reading_order_name,
|
||||
)
|
||||
else:
|
||||
for nav_unit_ in nav_container.pop("$247"):
|
||||
nav_unit = self.get_fragment(ftype="$393", fid=nav_unit_)
|
||||
nav_unit.pop("mkfx_id", None)
|
||||
|
||||
if nav_type in {"$212", "$214", "$213"}:
|
||||
self.process_nav_unit(
|
||||
nav_type,
|
||||
nav_unit,
|
||||
self.epub.ncx_toc,
|
||||
nav_container_name,
|
||||
section_name,
|
||||
)
|
||||
|
||||
elif nav_type == "$236":
|
||||
label = self.get_representation(nav_unit)[0]
|
||||
nav_unit_name = nav_unit.pop("$240", label)
|
||||
target_position = self.get_position(nav_unit.pop("$246"))
|
||||
landmark_type = nav_unit.pop("$238", None)
|
||||
|
||||
if landmark_type:
|
||||
guide_type = GUIDE_TYPE_OF_LANDMARK_TYPE.get(landmark_type)
|
||||
if guide_type is None:
|
||||
log.warning("Unexpected landmark_type: %s" % landmark_type)
|
||||
guide_type = landmark_type
|
||||
|
||||
if label == "cover-nav-unit":
|
||||
label = ""
|
||||
|
||||
anchor_name = self.unique_anchor_name(
|
||||
str(nav_unit_name) or guide_type
|
||||
)
|
||||
self.register_anchor(anchor_name, target_position)
|
||||
self.epub.add_guide_entry(guide_type, label, anchor=anchor_name)
|
||||
|
||||
elif nav_type == "$237":
|
||||
label = self.get_representation(nav_unit)[0]
|
||||
nav_unit_name = nav_unit.pop("$240", "page_list_entry")
|
||||
target_position = self.get_position(nav_unit.pop("$246"))
|
||||
|
||||
if nav_unit_name != "page_list_entry":
|
||||
log.warning(
|
||||
"Unexpected page_list nav_unit_name: %s" % nav_unit_name
|
||||
)
|
||||
|
||||
if label and (
|
||||
KEEP_APPROX_PG_NUMS
|
||||
or DEBUG_PAGES
|
||||
or nav_container_name != APPROXIMATE_PAGE_LIST
|
||||
):
|
||||
anchor_name = "page_%s" % label
|
||||
if len(self.reading_orders) > 1:
|
||||
anchor_name = "%s_%s" % (reading_order_name, anchor_name)
|
||||
|
||||
anchor_name = self.unique_anchor_name(anchor_name)
|
||||
anchor_id = self.register_anchor(anchor_name, target_position)
|
||||
|
||||
if (
|
||||
PREVENT_DUPLICATE_PAGE_TARGETS
|
||||
and anchor_id in self.page_anchor_id_label
|
||||
):
|
||||
log.warning(
|
||||
"Page %s is at the same position as page %s"
|
||||
% (label, self.page_anchor_id_label[anchor_id])
|
||||
)
|
||||
else:
|
||||
self.page_anchor_id_label[anchor_id] = label
|
||||
|
||||
if self.page_label_anchor_id.get(label) == anchor_id:
|
||||
if (
|
||||
REPORT_DUPLICATE_PAGES
|
||||
and label not in self.reported_duplicate_page_label
|
||||
):
|
||||
log.warning(
|
||||
"Page %s occurs multiple times with same position"
|
||||
% label
|
||||
)
|
||||
self.reported_duplicate_page_label.add(label)
|
||||
elif (
|
||||
PREVENT_DUPLICATE_PAGE_LABELS
|
||||
and len(self.reading_orders) == 1
|
||||
):
|
||||
log.warning(
|
||||
"Page %s occurs multiple times with different positions"
|
||||
% label
|
||||
)
|
||||
else:
|
||||
self.page_label_anchor_id[label] = anchor_id
|
||||
self.epub.add_pagemap_entry(label, anchor=anchor_name)
|
||||
|
||||
self.check_empty(
|
||||
nav_unit, "nav_container %s nav_unit" % nav_container_name
|
||||
)
|
||||
|
||||
self.check_empty(nav_container, "nav_container %s" % nav_container_name)
|
||||
|
||||
def process_nav_unit(
|
||||
self, nav_type, nav_unit, ncx_toc, nav_container_name, section_name
|
||||
):
|
||||
label, icon = self.get_representation(nav_unit)
|
||||
if label:
|
||||
label = label.strip()
|
||||
|
||||
description = nav_unit.pop("$154", None)
|
||||
if description:
|
||||
description = description.strip()
|
||||
|
||||
nav_unit_name = nav_unit.pop("$240", label)
|
||||
nav_unit.pop("mkfx_id", None)
|
||||
|
||||
nested_toc = []
|
||||
|
||||
for entry in nav_unit.pop("$247", []):
|
||||
nested_nav_unit = self.get_fragment(ftype="$393", fid=entry)
|
||||
self.process_nav_unit(
|
||||
nav_type, nested_nav_unit, nested_toc, nav_container_name, section_name
|
||||
)
|
||||
|
||||
for entry_set in nav_unit.pop("$248", []):
|
||||
for entry in entry_set.pop("$247", []):
|
||||
nested_nav_unit = self.get_fragment(ftype="$393", fid=entry)
|
||||
self.process_nav_unit(
|
||||
nav_type,
|
||||
nested_nav_unit,
|
||||
nested_toc,
|
||||
nav_container_name,
|
||||
section_name,
|
||||
)
|
||||
|
||||
orientation = entry_set.pop("$215")
|
||||
if orientation == "$386":
|
||||
if self.epub.orientation_lock != "landscape":
|
||||
nested_toc = []
|
||||
elif orientation == "$385":
|
||||
if self.epub.orientation_lock == "landscape":
|
||||
nested_toc = []
|
||||
else:
|
||||
log.error("Unknown entry set orientation: %s" % orientation)
|
||||
|
||||
if section_name and nav_type == "$214":
|
||||
for i, entry in enumerate(nested_toc):
|
||||
self.navto_anchor[(section_name, float(i))] = entry.anchor
|
||||
|
||||
self.check_empty(
|
||||
entry_set,
|
||||
"nav_container %s %s entry_set" % (nav_container_name, nav_type),
|
||||
)
|
||||
|
||||
if "$246" in nav_unit:
|
||||
anchor_name = "toc%d_%s" % (self.toc_entry_count, nav_unit_name)
|
||||
self.toc_entry_count += 1
|
||||
|
||||
target_position = self.get_position(nav_unit.pop("$246"))
|
||||
self.register_anchor(anchor_name, target_position)
|
||||
else:
|
||||
anchor_name = None
|
||||
|
||||
if (not label) and (not anchor_name):
|
||||
ncx_toc.extend(nested_toc)
|
||||
else:
|
||||
ncx_toc.append(
|
||||
TocEntry(
|
||||
label,
|
||||
anchor=anchor_name,
|
||||
children=nested_toc,
|
||||
description=description,
|
||||
icon=self.process_external_resource(icon).filename
|
||||
if icon
|
||||
else None,
|
||||
)
|
||||
)
|
||||
|
||||
self.check_empty(
|
||||
nav_unit, "nav_container %s %s nav_unit" % (nav_container_name, nav_type)
|
||||
)
|
||||
|
||||
def unique_anchor_name(self, anchor_name):
|
||||
if anchor_name and anchor_name not in self.anchor_positions:
|
||||
return anchor_name
|
||||
|
||||
count = 0
|
||||
while True:
|
||||
new_anchor_name = "%s:%d" % (anchor_name, count)
|
||||
|
||||
if new_anchor_name not in self.anchor_positions:
|
||||
return new_anchor_name
|
||||
|
||||
count += 1
|
||||
|
||||
def process_anchors(self):
|
||||
anchors = self.book_data.pop("$266", {})
|
||||
for anchor_name, anchor in anchors.items():
|
||||
self.check_fragment_name(anchor, "$266", anchor_name)
|
||||
|
||||
if "$186" in anchor:
|
||||
self.anchor_uri[str(anchor_name)] = anchor.pop("$186")
|
||||
elif "$183" in anchor:
|
||||
self.register_anchor(
|
||||
str(anchor_name), self.get_position(anchor.pop("$183"))
|
||||
)
|
||||
|
||||
anchor.pop("$597", None)
|
||||
|
||||
self.check_empty(anchor, "anchor %s" % anchor_name)
|
||||
|
||||
def get_position(self, position):
|
||||
id = self.get_location_id(position)
|
||||
offset = position.pop("$143", 0)
|
||||
self.check_empty(position, "position")
|
||||
return (id, offset)
|
||||
|
||||
def get_representation(self, entry):
|
||||
label = ""
|
||||
icon = None
|
||||
|
||||
if "$241" in entry:
|
||||
representation = entry.pop("$241")
|
||||
|
||||
if "$245" in representation:
|
||||
icon = representation.pop("$245")
|
||||
self.process_external_resource(icon)
|
||||
label = str(icon)
|
||||
|
||||
if "$244" in representation:
|
||||
label = representation.pop("$244")
|
||||
|
||||
self.check_empty(representation, "nav_container representation")
|
||||
|
||||
return (label, icon)
|
||||
|
||||
def position_str(self, position):
|
||||
return "%s.%d" % position
|
||||
|
||||
def register_anchor(self, anchor_name, position):
|
||||
if self.DEBUG:
|
||||
log.debug(
|
||||
"register_anchor %s = %s" % (anchor_name, self.position_str(position))
|
||||
)
|
||||
|
||||
if not anchor_name:
|
||||
raise Exception(
|
||||
"register_anchor: anchor name is missing for position %s"
|
||||
% self.position_str(position)
|
||||
)
|
||||
|
||||
if anchor_name not in self.anchor_positions:
|
||||
self.anchor_positions[anchor_name] = set()
|
||||
|
||||
self.anchor_positions[anchor_name].add(position)
|
||||
|
||||
eid, offset = position
|
||||
if eid not in self.position_anchors:
|
||||
self.position_anchors[eid] = {}
|
||||
|
||||
if offset not in self.position_anchors[eid]:
|
||||
self.position_anchors[eid][offset] = []
|
||||
|
||||
if anchor_name not in self.position_anchors[eid][offset]:
|
||||
self.position_anchors[eid][offset].append(anchor_name)
|
||||
|
||||
return self.get_anchor_id(self.position_anchors[eid][offset][0])
|
||||
|
||||
def register_link_id(self, eid, kind):
|
||||
return self.register_anchor("%s_%s" % (kind, eid), (eid, 0))
|
||||
|
||||
def get_anchor_id(self, anchor_name):
|
||||
if anchor_name not in self.anchor_id:
|
||||
self.anchor_id[anchor_name] = new_id = make_unique_name(
|
||||
self.fix_html_id(anchor_name), self.anchor_ids
|
||||
)
|
||||
self.anchor_ids.add(new_id)
|
||||
|
||||
return self.anchor_id[anchor_name]
|
||||
|
||||
def process_position(self, eid, offset, elem):
|
||||
if self.DEBUG:
|
||||
log.debug("process position %s" % self.position_str((eid, offset)))
|
||||
|
||||
if eid in self.position_anchors:
|
||||
if offset in self.position_anchors[eid]:
|
||||
if self.DEBUG:
|
||||
log.debug("at registered position")
|
||||
|
||||
if not elem.get("id", ""):
|
||||
elem_id = self.get_anchor_id(self.position_anchors[eid][offset][0])
|
||||
elem.set("id", elem_id)
|
||||
if self.DEBUG:
|
||||
log.debug(
|
||||
"set element id %s for position %s"
|
||||
% (elem_id, self.position_str((eid, offset)))
|
||||
)
|
||||
|
||||
anchor_names = self.position_anchors[eid].pop(offset)
|
||||
for anchor_name in anchor_names:
|
||||
self.anchor_elem[anchor_name] = elem
|
||||
|
||||
if len(self.position_anchors[eid]) == 0:
|
||||
self.position_anchors.pop(eid)
|
||||
|
||||
return anchor_names
|
||||
|
||||
return []
|
||||
|
||||
def move_anchor(self, old_elem, new_elem):
|
||||
for anchor_name, elem in self.anchor_elem.items():
|
||||
if elem is old_elem:
|
||||
self.anchor_elem[anchor_name] = new_elem
|
||||
|
||||
if "id" in old_elem.attrib:
|
||||
new_elem.set("id", old_elem.attrib.pop("id"))
|
||||
|
||||
def move_anchors(self, old_root, target_elem):
|
||||
for anchor_name, elem in self.anchor_elem.items():
|
||||
if root_element(elem) is old_root:
|
||||
self.anchor_elem[anchor_name] = target_elem
|
||||
|
||||
if "id" in old_root.attrib and "id" not in target_elem.attrib:
|
||||
target_elem.set("id", old_root.get("id"))
|
||||
|
||||
def get_anchor_uri(self, anchor_name):
|
||||
self.used_anchors.add(anchor_name)
|
||||
|
||||
if anchor_name in self.anchor_uri:
|
||||
return self.anchor_uri[anchor_name]
|
||||
|
||||
positions = self.anchor_positions.get(anchor_name, [])
|
||||
log.error(
|
||||
"Failed to locate uri for anchor: %s (position: %s)"
|
||||
% (
|
||||
anchor_name,
|
||||
", ".join([self.position_str(p) for p in sorted(positions)]),
|
||||
)
|
||||
)
|
||||
return "/MISSING_ANCHOR#" + anchor_name
|
||||
|
||||
def report_duplicate_anchors(self):
|
||||
for anchor_name, positions in self.anchor_positions.items():
|
||||
if (anchor_name in self.used_anchors) and (len(positions) > 1):
|
||||
log.error(
|
||||
"Anchor %s has multiple positions: %s"
|
||||
% (
|
||||
anchor_name,
|
||||
", ".join([self.position_str(p) for p in sorted(positions)]),
|
||||
)
|
||||
)
|
||||
|
||||
def anchor_as_uri(self, anchor):
|
||||
return "anchor:" + anchor
|
||||
|
||||
def anchor_from_uri(self, uri):
|
||||
return uri[7:]
|
||||
|
||||
def id_of_anchor(self, anchor, filename):
|
||||
url = self.get_anchor_uri(anchor)
|
||||
purl = urllib.parse.urlparse(url)
|
||||
|
||||
if purl.path != filename or not purl.fragment:
|
||||
log.error("anchor %s in file %s links to %s" % (anchor, filename, url))
|
||||
|
||||
return purl.fragment
|
||||
|
||||
def fixup_anchors_and_hrefs(self):
|
||||
for anchor_name, elem in self.anchor_elem.items():
|
||||
root = root_element(elem)
|
||||
|
||||
for book_part in self.epub.book_parts:
|
||||
if book_part.html is root:
|
||||
elem_id = elem.get("id", "")
|
||||
if not elem_id:
|
||||
elem_id = self.get_anchor_id(str(anchor_name))
|
||||
elem.set("id", elem_id)
|
||||
|
||||
self.anchor_uri[anchor_name] = "%s#%s" % (
|
||||
urllib.parse.quote(book_part.filename),
|
||||
elem_id,
|
||||
)
|
||||
break
|
||||
else:
|
||||
log.error(
|
||||
"Failed to locate element within book parts for anchor %s"
|
||||
% anchor_name
|
||||
)
|
||||
|
||||
self.anchor_elem = None
|
||||
|
||||
for book_part in self.epub.book_parts:
|
||||
body = book_part.body()
|
||||
for e in body.iter("*"):
|
||||
if "id" in e.attrib and not visible_elements_before(e):
|
||||
uri = book_part.filename + "#" + e.get("id")
|
||||
if self.DEBUG:
|
||||
log.debug("no visible element before %s" % uri)
|
||||
|
||||
for anchor, a_uri in self.anchor_uri.items():
|
||||
if (a_uri == uri) and (anchor not in self.immovable_anchors):
|
||||
self.anchor_uri[anchor] = urllib.parse.quote(
|
||||
book_part.filename
|
||||
)
|
||||
if self.DEBUG:
|
||||
log.debug(" moved anchor %s" % anchor)
|
||||
|
||||
for book_part in self.epub.book_parts:
|
||||
body = book_part.body()
|
||||
for e in body.iter("*"):
|
||||
if e.tag == "a" and e.get("href", "").startswith("anchor:"):
|
||||
e.set(
|
||||
"href",
|
||||
urlrelpath(
|
||||
self.get_anchor_uri(
|
||||
self.anchor_from_uri(e.attrib.pop("href"))
|
||||
),
|
||||
ref_from=book_part.filename,
|
||||
),
|
||||
)
|
||||
|
||||
for g in self.epub.guide:
|
||||
g.target = self.get_anchor_uri(g.anchor)
|
||||
|
||||
for p in self.epub.pagemap:
|
||||
p.target = self.get_anchor_uri(p.anchor)
|
||||
|
||||
def resolve_toc_target(ncx_toc):
|
||||
for toc_entry in ncx_toc:
|
||||
if toc_entry.anchor:
|
||||
toc_entry.target = self.get_anchor_uri(toc_entry.anchor)
|
||||
|
||||
if toc_entry.children:
|
||||
resolve_toc_target(toc_entry.children)
|
||||
|
||||
resolve_toc_target(self.epub.ncx_toc)
|
||||
|
||||
|
||||
def root_element(elem):
|
||||
while elem.getparent() is not None:
|
||||
elem = elem.getparent()
|
||||
|
||||
return elem
|
||||
|
||||
|
||||
def visible_elements_before(elem, root=None):
|
||||
if root is None:
|
||||
root = elem
|
||||
while root.tag != "body":
|
||||
root = root.getparent()
|
||||
|
||||
if elem is root:
|
||||
return False
|
||||
|
||||
for e in root.iterfind(".//*"):
|
||||
if e is elem:
|
||||
break
|
||||
|
||||
if e.tag in ["img", "br", "hr", "li", "ol", "ul"] or e.text or e.tail:
|
||||
return True
|
||||
|
||||
return False
|
||||
3165
kindle_download_helper/third_party/kfxlib/yj_to_epub_properties.py
vendored
Normal file
3165
kindle_download_helper/third_party/kfxlib/yj_to_epub_properties.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
580
kindle_download_helper/third_party/kfxlib/yj_to_epub_resources.py
vendored
Normal file
580
kindle_download_helper/third_party/kfxlib/yj_to_epub_resources.py
vendored
Normal file
@@ -0,0 +1,580 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import io
|
||||
import posixpath
|
||||
import re
|
||||
|
||||
from PIL import Image
|
||||
|
||||
from .message_logging import log
|
||||
from .python_transition import IS_PYTHON2
|
||||
from .utilities import (
|
||||
EXTS_OF_MIMETYPE,
|
||||
RESOURCE_TYPE_OF_EXT,
|
||||
convert_jxr_to_tiff,
|
||||
convert_pdf_to_jpeg,
|
||||
disable_debug_log,
|
||||
font_file_ext,
|
||||
image_file_ext,
|
||||
root_filename,
|
||||
urlrelpath,
|
||||
)
|
||||
from .yj_structure import SYMBOL_FORMATS
|
||||
|
||||
if IS_PYTHON2:
|
||||
from .python_transition import repr, urllib
|
||||
else:
|
||||
import urllib.parse
|
||||
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2016-2022, John Howell <jhowell@acm.org>"
|
||||
|
||||
|
||||
USE_HIGHEST_RESOLUTION_IMAGE_VARIANT = True
|
||||
FIX_PDF = True
|
||||
FIX_JPEG_XR = True
|
||||
JXR_TO_JPEG_QUALITY = 90
|
||||
MIN_JPEG_QUALITY = 80
|
||||
MAX_JPEG_QUALITY = 100
|
||||
TILE_SIZE_REPORT_PERCENTAGE = 10
|
||||
|
||||
|
||||
class Obj(object):
|
||||
def __init__(self, **kwargs):
|
||||
self.__dict__.update(kwargs)
|
||||
|
||||
|
||||
class KFX_EPUB_Resources(object):
|
||||
def get_external_resource(self, resource_name, ignore_variants=False):
|
||||
resource_obj = self.resource_cache.get(resource_name)
|
||||
if resource_obj is not None:
|
||||
return resource_obj
|
||||
|
||||
resource = self.get_fragment(ftype="$164", fid=resource_name)
|
||||
|
||||
if resource.pop("$175", "") != resource_name:
|
||||
raise Exception("Name of resource %s is incorrect" % resource_name)
|
||||
|
||||
format = resource.pop("$161", None)
|
||||
|
||||
if format in SYMBOL_FORMATS:
|
||||
extension = "." + SYMBOL_FORMATS[format]
|
||||
elif format is not None:
|
||||
log.error("Resource %s has unknown format: %s" % (resource_name, format))
|
||||
extension = ".bin"
|
||||
|
||||
fixed_height = resource.pop("$67", None)
|
||||
fixed_width = resource.pop("$66", None)
|
||||
|
||||
resource_height = resource.pop("$423", None) or fixed_height
|
||||
resource_width = resource.pop("$422", None) or fixed_width
|
||||
|
||||
if "$636" in resource:
|
||||
tile_height = resource.pop("$638")
|
||||
tile_width = resource.pop("$637")
|
||||
tile_padding = resource.pop("$797", 0)
|
||||
|
||||
with disable_debug_log():
|
||||
full_image = Image.new("RGB", (resource_width, resource_height))
|
||||
separate_tiles_size = tile_count = 0
|
||||
|
||||
col = resource.pop("$636")
|
||||
for y, row in enumerate(col):
|
||||
top_padding = 0 if y == 0 else tile_padding
|
||||
bottom_padding = (
|
||||
resource_height - tile_height * len(col)
|
||||
if y == len(col) - 1
|
||||
else tile_padding
|
||||
)
|
||||
|
||||
for x, location in enumerate(row):
|
||||
left_padding = 0 if x == 0 else tile_padding
|
||||
right_padding = (
|
||||
resource_width - tile_width * len(row)
|
||||
if x == len(row) - 1
|
||||
else tile_padding
|
||||
)
|
||||
|
||||
tile_raw_media = self.locate_raw_media(location)
|
||||
if tile_raw_media is not None:
|
||||
tile_count += 1
|
||||
separate_tiles_size += len(tile_raw_media)
|
||||
tile = Image.open(io.BytesIO(tile_raw_media))
|
||||
twidth, theight = tile.size
|
||||
if (
|
||||
twidth != tile_width + left_padding + right_padding
|
||||
or theight != tile_height + top_padding + bottom_padding
|
||||
):
|
||||
log.error(
|
||||
"Resource %s tile %d, %d size (%d, %d) does not have expected padding %d of (%d, %d) for %s"
|
||||
% (
|
||||
resource_name,
|
||||
x,
|
||||
y,
|
||||
twidth,
|
||||
theight,
|
||||
tile_padding,
|
||||
tile_width,
|
||||
tile_height,
|
||||
resource_name,
|
||||
)
|
||||
)
|
||||
|
||||
crop = (
|
||||
left_padding,
|
||||
top_padding,
|
||||
tile_width + left_padding,
|
||||
tile_height + top_padding,
|
||||
)
|
||||
tile = tile.crop(crop)
|
||||
full_image.paste(tile, (x * tile_width, y * tile_height))
|
||||
tile.close()
|
||||
|
||||
if full_image.size != (resource_width, resource_height):
|
||||
log.error(
|
||||
"Resource %s combined tiled image size is (%d, %d) but should be (%d, %d) for %s"
|
||||
% (
|
||||
resource_name,
|
||||
full_image.size[0],
|
||||
full_image.size[1],
|
||||
resource_width,
|
||||
resource_height,
|
||||
resource_name,
|
||||
)
|
||||
)
|
||||
|
||||
min_quality = MIN_JPEG_QUALITY
|
||||
max_quality = MAX_JPEG_QUALITY
|
||||
best_size_diff = best_quality = raw_media = None
|
||||
while True:
|
||||
quality = (max_quality + min_quality) // 2
|
||||
outfile = io.BytesIO()
|
||||
full_image.save(
|
||||
outfile,
|
||||
"jpeg" if extension == ".jpg" else extension[1:],
|
||||
quality=quality,
|
||||
)
|
||||
test_raw_media = outfile.getvalue()
|
||||
outfile.close()
|
||||
|
||||
size_diff = abs(separate_tiles_size - len(test_raw_media))
|
||||
if best_size_diff is None or size_diff < best_size_diff:
|
||||
best_size_diff = size_diff
|
||||
best_quality = quality
|
||||
raw_media = test_raw_media
|
||||
|
||||
if separate_tiles_size > len(test_raw_media):
|
||||
min_quality = quality + 1
|
||||
else:
|
||||
max_quality = quality - 1
|
||||
|
||||
if max_quality < min_quality:
|
||||
break
|
||||
|
||||
if (
|
||||
best_size_diff * 100
|
||||
) // separate_tiles_size > TILE_SIZE_REPORT_PERCENTAGE or True:
|
||||
log.warning(
|
||||
"Image resource %s has %d tiles with total size %d combined into image of size %d quality %d"
|
||||
% (
|
||||
resource_name,
|
||||
tile_count,
|
||||
separate_tiles_size,
|
||||
len(raw_media),
|
||||
best_quality,
|
||||
)
|
||||
)
|
||||
|
||||
location = location.partition("-tile")[0]
|
||||
else:
|
||||
location = resource.pop("$165")
|
||||
search_path = resource.pop("$166", location)
|
||||
if search_path != location:
|
||||
log.error(
|
||||
"Image resource %s has location %s != search_path %s"
|
||||
% (resource_name, location, search_path)
|
||||
)
|
||||
|
||||
raw_media = self.locate_raw_media(location)
|
||||
|
||||
mime = resource.pop("$162", None)
|
||||
|
||||
if mime in EXTS_OF_MIMETYPE:
|
||||
if extension == ".pobject" or extension == ".bin":
|
||||
if mime == "figure":
|
||||
extension = image_file_ext(raw_media)
|
||||
else:
|
||||
extension = EXTS_OF_MIMETYPE[mime][0]
|
||||
elif mime is not None:
|
||||
log.error(
|
||||
"Resource %s has unknown mime type: %s" % (resource_name, repr(mime))
|
||||
)
|
||||
|
||||
location_fn = location
|
||||
|
||||
location_fn = resource.pop(
|
||||
"yj.conversion.source_resource_filename", location_fn
|
||||
)
|
||||
location_fn = resource.pop("yj.authoring.source_file_name", location_fn)
|
||||
|
||||
if (extension == ".pobject" or extension == ".bin") and "." in location_fn:
|
||||
extension = "." + location_fn.rpartition(".")[2]
|
||||
|
||||
if not location_fn.endswith(extension):
|
||||
location_fn = location_fn.partition(".")[0] + extension
|
||||
|
||||
resource.pop("$597", None)
|
||||
resource.pop("$57", None)
|
||||
resource.pop("$56", None)
|
||||
resource.pop("$499", None)
|
||||
resource.pop("$500", None)
|
||||
resource.pop("$137", None)
|
||||
resource.pop("$136", None)
|
||||
referred_resources = resource.pop("$167", [])
|
||||
|
||||
if "$214" in resource:
|
||||
self.process_external_resource(resource.pop("$214"), save=False)
|
||||
|
||||
if FIX_JPEG_XR and (format == "$548") and (raw_media is not None):
|
||||
try:
|
||||
tiff_data = convert_jxr_to_tiff(raw_media, location_fn)
|
||||
except Exception as e:
|
||||
log.error(
|
||||
"Exception during conversion of JPEG-XR '%s' to TIFF: %s"
|
||||
% (location_fn, repr(e))
|
||||
)
|
||||
else:
|
||||
with disable_debug_log():
|
||||
img = Image.open(io.BytesIO(tiff_data))
|
||||
ofmt, extension = (
|
||||
("PNG", ".png") if img.mode == "RGBA" else ("JPEG", ".jpg")
|
||||
)
|
||||
outfile = io.BytesIO()
|
||||
img.save(outfile, ofmt, quality=JXR_TO_JPEG_QUALITY)
|
||||
img.close()
|
||||
|
||||
raw_media = outfile.getvalue()
|
||||
outfile.close()
|
||||
location_fn = location_fn.rpartition(".")[0] + extension
|
||||
|
||||
suffix = ""
|
||||
if (
|
||||
FIX_PDF
|
||||
and format == "$565"
|
||||
and raw_media is not None
|
||||
and "$564" in resource
|
||||
):
|
||||
page_num = resource["$564"] + 1
|
||||
try:
|
||||
jpeg_data = convert_pdf_to_jpeg(
|
||||
raw_media, page_num, reported_errors=self.reported_pdf_errors
|
||||
)
|
||||
except Exception as e:
|
||||
log.error(
|
||||
'Exception during conversion of PDF "%s" page %d to JPEG: %s'
|
||||
% (location_fn, page_num, repr(e))
|
||||
)
|
||||
else:
|
||||
raw_media = jpeg_data
|
||||
extension = ".jpg"
|
||||
location_fn = location_fn.rpartition(".")[0] + extension
|
||||
suffix = "-page%d" % page_num
|
||||
resource.pop("$564")
|
||||
|
||||
filename = self.resource_location_filename(
|
||||
location_fn, suffix, self.epub.IMAGE_FILEPATH
|
||||
)
|
||||
|
||||
if not ignore_variants:
|
||||
for rr in resource.pop("$635", []):
|
||||
variant = self.get_external_resource(rr, ignore_variants=True)
|
||||
|
||||
if (
|
||||
USE_HIGHEST_RESOLUTION_IMAGE_VARIANT
|
||||
and variant is not None
|
||||
and variant.width > resource_width
|
||||
and variant.height > resource_height
|
||||
):
|
||||
if self.DEBUG:
|
||||
log.info(
|
||||
"Replacing image %s (%dx%d) with variant %s (%dx%d)"
|
||||
% (
|
||||
filename,
|
||||
resource_width,
|
||||
resource_height,
|
||||
variant.filename,
|
||||
variant.width,
|
||||
variant.height,
|
||||
)
|
||||
)
|
||||
|
||||
raw_media, filename, resource_width, resource_height = (
|
||||
variant.raw_media,
|
||||
variant.filename,
|
||||
variant.width,
|
||||
variant.height,
|
||||
)
|
||||
|
||||
if "$564" in resource:
|
||||
filename += "#page=%d" % (resource.pop("$564") + 1)
|
||||
|
||||
self.check_empty(resource, "resource %s" % resource_name)
|
||||
|
||||
resource_obj = self.resource_cache[resource_name] = Obj(
|
||||
raw_media=raw_media,
|
||||
filename=filename,
|
||||
extension=extension,
|
||||
format=format,
|
||||
mime=mime,
|
||||
location=location,
|
||||
width=resource_width,
|
||||
height=resource_height,
|
||||
referred_resources=referred_resources,
|
||||
manifest_entry=None,
|
||||
)
|
||||
|
||||
return resource_obj
|
||||
|
||||
def process_external_resource(
|
||||
self,
|
||||
resource_name,
|
||||
save=True,
|
||||
process_referred=False,
|
||||
save_referred=False,
|
||||
is_plugin=False,
|
||||
is_referred=False,
|
||||
):
|
||||
resource_obj = self.get_external_resource(resource_name)
|
||||
|
||||
if (
|
||||
save
|
||||
and self.save_resources
|
||||
and resource_obj.raw_media
|
||||
is not None
|
||||
is resource_obj.manifest_entry
|
||||
is None
|
||||
):
|
||||
filename = (
|
||||
root_filename(resource_obj.location)
|
||||
if is_referred
|
||||
else resource_obj.filename
|
||||
)
|
||||
filename, fragment_sep, fragment = filename.partition("#")
|
||||
base_filename = filename
|
||||
cnt = 0
|
||||
while filename in self.epub.oebps_files:
|
||||
if (
|
||||
self.epub.oebps_files[filename].binary_data
|
||||
== resource_obj.raw_media
|
||||
):
|
||||
resource_obj.manifest_entry = self.epub.manifest_files[filename]
|
||||
break
|
||||
|
||||
if is_referred and cnt == 0:
|
||||
log.error(
|
||||
"Multiple referred resources exist with location %s"
|
||||
% resource_obj.location
|
||||
)
|
||||
|
||||
fn, ext = posixpath.splitext(base_filename)
|
||||
filename = "%s_%d%s" % (fn, cnt, ext)
|
||||
cnt += 1
|
||||
else:
|
||||
resource_obj.manifest_entry = self.epub.manifest_resource(
|
||||
filename,
|
||||
data=resource_obj.raw_media,
|
||||
height=resource_obj.height,
|
||||
width=resource_obj.width,
|
||||
mimetype=resource_obj.mime if is_referred else None,
|
||||
)
|
||||
|
||||
resource_obj.filename = filename + fragment_sep + fragment
|
||||
resource_obj.is_saved = True
|
||||
|
||||
if process_referred or save_referred:
|
||||
for rr in resource_obj.referred_resources:
|
||||
self.process_external_resource(rr, save=save_referred, is_referred=True)
|
||||
|
||||
if is_referred:
|
||||
pass
|
||||
elif is_plugin and resource_obj.format not in ["$287", "$284"]:
|
||||
log.error(
|
||||
"Unexpected plugin resource format %s for %s"
|
||||
% (resource_obj.format, resource_name)
|
||||
)
|
||||
elif (not is_plugin) and resource_obj.extension == ".pobject":
|
||||
log.error(
|
||||
"Unexpected non-plugin resource format %s for %s"
|
||||
% (resource_obj.extension, resource_name)
|
||||
)
|
||||
|
||||
return resource_obj
|
||||
|
||||
def locate_raw_media(self, location, report_missing=True):
|
||||
try:
|
||||
raw_media = self.book_data["$417"][location]
|
||||
self.used_raw_media.add(location)
|
||||
except Exception:
|
||||
if report_missing:
|
||||
log.error("Missing bcRawMedia %s" % location)
|
||||
|
||||
raw_media = None
|
||||
|
||||
return raw_media
|
||||
|
||||
def resource_location_filename(self, location, suffix, filepath_template):
|
||||
if (location, suffix) in self.location_filenames:
|
||||
return self.location_filenames[(location, suffix)]
|
||||
|
||||
if location.startswith("/"):
|
||||
location = "_" + location[1:]
|
||||
|
||||
safe_location = re.sub(r"[^A-Za-z0-9_/.-]", "_", location)
|
||||
safe_location = safe_location.replace("//", "/x/")
|
||||
|
||||
path, sep, name = safe_location.rpartition("/")
|
||||
path += sep
|
||||
|
||||
root, sep, ext = name.rpartition(".")
|
||||
ext = sep + ext
|
||||
resource_type = RESOURCE_TYPE_OF_EXT.get(ext, "resource")
|
||||
|
||||
unique_part = self.unique_part_of_local_symbol(root)
|
||||
root = self.prefix_unique_part_of_symbol(unique_part, resource_type)
|
||||
|
||||
for prefix in ["resource/", filepath_template[1:].partition("/")[0] + "/"]:
|
||||
if path.startswith(prefix):
|
||||
path = path[len(prefix) :]
|
||||
|
||||
safe_filename = filepath_template % ("%s%s%s%s" % (path, root, suffix, ext))
|
||||
|
||||
unique_count = 0
|
||||
oebps_files_lower = set([n.lower() for n in self.epub.oebps_files.keys()])
|
||||
|
||||
while safe_filename.lower() in oebps_files_lower:
|
||||
safe_filename = filepath_template % (
|
||||
"%s%s%s-%d%s" % (path, root, suffix, unique_count, ext)
|
||||
)
|
||||
unique_count += 1
|
||||
|
||||
self.location_filenames[(location, suffix)] = safe_filename
|
||||
return safe_filename
|
||||
|
||||
def process_fonts(self):
|
||||
fonts = self.book_data.pop("$262", {})
|
||||
raw_fonts = self.book_data.pop("$418", {})
|
||||
raw_media = self.book_data.get("$417", {})
|
||||
used_fonts = {}
|
||||
|
||||
for font in fonts.values():
|
||||
location = font.pop("$165")
|
||||
|
||||
if location in used_fonts:
|
||||
font["src"] = 'url("%s")' % urllib.parse.quote(
|
||||
urlrelpath(
|
||||
used_fonts[location], ref_from=self.epub.STYLES_CSS_FILEPATH
|
||||
)
|
||||
)
|
||||
elif location in raw_fonts or (
|
||||
self.book.is_kpf_prepub and location in raw_media
|
||||
):
|
||||
raw_font = raw_fonts.pop(location, None) or raw_media.pop(location)
|
||||
|
||||
filename = location
|
||||
if "." not in filename:
|
||||
ext = font_file_ext(raw_font)
|
||||
if not ext:
|
||||
log.error(
|
||||
"Font %s has unknown type (possibly obfuscated)" % filename
|
||||
)
|
||||
ext = ".font"
|
||||
|
||||
filename = "%s%s" % (filename, ext)
|
||||
|
||||
filename = self.resource_location_filename(
|
||||
filename, "", self.epub.FONT_FILEPATH
|
||||
)
|
||||
|
||||
if filename not in self.epub.oebps_files:
|
||||
self.epub.manifest_resource(filename, data=raw_font)
|
||||
|
||||
font["src"] = 'url("%s")' % urlrelpath(
|
||||
urllib.parse.quote(filename), ref_from=self.epub.STYLES_CSS_FILEPATH
|
||||
)
|
||||
used_fonts[location] = filename
|
||||
else:
|
||||
log.error("Missing bcRawFont %s" % location)
|
||||
|
||||
for prop in ["$15", "$12", "$13"]:
|
||||
if prop in font and font[prop] == "$350":
|
||||
font.pop(prop)
|
||||
|
||||
self.fix_font_name(font["$11"], add=True)
|
||||
self.font_faces.append(self.convert_yj_properties(font))
|
||||
|
||||
for location in raw_fonts:
|
||||
log.warning("Unused font file: %s" % location)
|
||||
filename = self.resource_location_filename(
|
||||
location, "", self.epub.FONT_FILEPATH
|
||||
)
|
||||
self.epub.manifest_resource(filename, data=raw_fonts[location])
|
||||
|
||||
def uri_reference(
|
||||
self, uri, save=True, save_referred=None, manifest_external_refs=False
|
||||
):
|
||||
purl = urllib.parse.urlparse(uri)
|
||||
|
||||
if purl.scheme == "kfx":
|
||||
return self.process_external_resource(
|
||||
urllib.parse.unquote(purl.netloc + purl.path),
|
||||
is_plugin=None,
|
||||
save=save,
|
||||
save_referred=save_referred,
|
||||
).filename
|
||||
|
||||
if purl.scheme in ["navto", "navt"]:
|
||||
anchor = self.navto_anchor.get(
|
||||
(
|
||||
urllib.parse.unquote(purl.netloc),
|
||||
float(purl.fragment) if purl.fragment else 0.0,
|
||||
)
|
||||
)
|
||||
if anchor is not None:
|
||||
return self.anchor_as_uri(anchor)
|
||||
else:
|
||||
log.error("Failed to locate anchor for %s" % uri)
|
||||
return "/MISSING_NAVTO#%s_%s" % (
|
||||
urllib.parse.unquote(purl.netloc),
|
||||
purl.fragment,
|
||||
)
|
||||
|
||||
if purl.scheme in ["http", "https"]:
|
||||
if manifest_external_refs:
|
||||
self.epub.manifest_resource(uri, external=True, report_dupe=False)
|
||||
|
||||
return uri
|
||||
|
||||
if purl.scheme != "mailto":
|
||||
log.error("Unexpected URI scheme: %s" % uri)
|
||||
|
||||
return uri
|
||||
|
||||
def unique_file_id(self, filename):
|
||||
if filename in self.file_ids:
|
||||
return self.file_ids[filename]
|
||||
|
||||
id = re.sub(r"[^A-Za-z0-9.-]", "_", filename.rpartition("/")[2][:64])
|
||||
|
||||
if not re.match(r"^[A-Za-z]", id[0]):
|
||||
id = "id_" + id
|
||||
|
||||
if id in self.file_ids.values():
|
||||
base_id = id
|
||||
unique_count = 0
|
||||
while id in self.file_ids.values():
|
||||
id = "%s_%d" % (base_id, unique_count)
|
||||
unique_count += 1
|
||||
|
||||
self.file_ids[filename] = id
|
||||
return id
|
||||
288
kindle_download_helper/third_party/kfxlib/yj_to_pdf.py
vendored
Normal file
288
kindle_download_helper/third_party/kfxlib/yj_to_pdf.py
vendored
Normal file
@@ -0,0 +1,288 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import io
|
||||
|
||||
from PIL import Image
|
||||
|
||||
try:
|
||||
import PyPDF2
|
||||
except ImportError:
|
||||
try:
|
||||
from . import PyPDF2
|
||||
except ImportError:
|
||||
PyPDF2 = None
|
||||
|
||||
|
||||
from .ion import (
|
||||
IonAnnotation,
|
||||
IonList,
|
||||
IonSExp,
|
||||
IonString,
|
||||
IonStruct,
|
||||
IonSymbol,
|
||||
ion_type,
|
||||
)
|
||||
from .message_logging import log
|
||||
from .utilities import convert_jxr_to_tiff, disable_debug_log, list_symbols
|
||||
from .yj_container import YJFragmentKey
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2016-2022, John Howell <jhowell@acm.org>"
|
||||
|
||||
|
||||
class ImageResource(object):
|
||||
def __init__(self, location, image_format, data):
|
||||
self.location = location
|
||||
self.image_format = image_format
|
||||
self.data = data
|
||||
|
||||
|
||||
class KFX_PDF(object):
|
||||
def __init__(self, book):
|
||||
self.book = book
|
||||
|
||||
def extract_pdf_resources(self):
|
||||
ordered_pdfs = self.get_ordered_images(["$565"])
|
||||
|
||||
if len(ordered_pdfs) == 0:
|
||||
pdf_data = None
|
||||
elif len(ordered_pdfs) == 1:
|
||||
pdf_data = ordered_pdfs[0].data
|
||||
elif PyPDF2 is None:
|
||||
log.error("PyPDF2 package is missing. Unable to combine PDF resources")
|
||||
pdf_data = None
|
||||
else:
|
||||
try:
|
||||
merger = PyPDF2.PdfFileMerger()
|
||||
for single_pdf in ordered_pdfs:
|
||||
merger.append(fileobj=io.BytesIO(single_pdf.data))
|
||||
|
||||
merged_file = io.BytesIO()
|
||||
merger.write(merged_file)
|
||||
pdf_data = merged_file.getvalue()
|
||||
merged_file.close()
|
||||
except Exception as e:
|
||||
log.error(repr(e))
|
||||
pdf_data = None
|
||||
|
||||
if pdf_data is not None:
|
||||
log.info(
|
||||
"Combined %d PDF resources into a single file" % len(ordered_pdfs)
|
||||
)
|
||||
|
||||
return pdf_data
|
||||
|
||||
def convert_image_resources(self):
|
||||
ordered_images = self.get_ordered_images(
|
||||
["$286", "$285", "$548", "$284"],
|
||||
include_unreferenced=False,
|
||||
allow_duplicates=True,
|
||||
)
|
||||
return convert_images_to_pdf_data(ordered_images)
|
||||
|
||||
def get_ordered_images(
|
||||
self, formats, include_unreferenced=True, allow_duplicates=False
|
||||
):
|
||||
image_resource_location = {}
|
||||
image_resources = {}
|
||||
|
||||
for fragment in self.book.fragments.get_all("$164"):
|
||||
resource = fragment.value
|
||||
resource_format = resource.get("$161")
|
||||
if resource_format in formats:
|
||||
location = resource.get("$165")
|
||||
if location is not None and location not in image_resources:
|
||||
raw_media = self.book.fragments.get(ftype="$417", fid=location)
|
||||
if raw_media is not None:
|
||||
image_resource_location[fragment.fid] = location
|
||||
image_resources[location] = ImageResource(
|
||||
location, resource_format, raw_media.value
|
||||
)
|
||||
|
||||
ordered_images = []
|
||||
unused_image_resource_ids = set(image_resources.keys())
|
||||
|
||||
for fid in self.collect_image_references(allow_duplicates):
|
||||
location = image_resource_location.get(fid)
|
||||
image_resource = image_resources.get(location)
|
||||
if image_resource is not None:
|
||||
ordered_images.append(image_resource)
|
||||
unused_image_resource_ids.discard(location)
|
||||
|
||||
if unused_image_resource_ids and include_unreferenced:
|
||||
log.error(
|
||||
"Found unreferenced resources: %s"
|
||||
% list_symbols(unused_image_resource_ids)
|
||||
)
|
||||
for fid in unused_image_resource_ids:
|
||||
ordered_images.append(image_resources[fid])
|
||||
|
||||
return ordered_images
|
||||
|
||||
def collect_image_references(self, allow_duplicates=False):
|
||||
processed_story_names = set()
|
||||
ordered_image_resources = []
|
||||
|
||||
def collect_section_info(section_name):
|
||||
pending_story_names = []
|
||||
section_image_resources = set()
|
||||
|
||||
def walk_content(data, content_key):
|
||||
data_type = ion_type(data)
|
||||
|
||||
if data_type is IonAnnotation:
|
||||
walk_content(data.value, content_key)
|
||||
|
||||
elif data_type is IonList:
|
||||
for i, fc in enumerate(data):
|
||||
if (
|
||||
content_key in {"$146", "$274"}
|
||||
and self.book.is_kpf_prepub
|
||||
and ion_type(fc) is IonSymbol
|
||||
):
|
||||
fc = self.book.fragments[
|
||||
YJFragmentKey(ftype="$608", fid=fc)
|
||||
]
|
||||
|
||||
walk_content(fc, content_key)
|
||||
|
||||
elif data_type is IonSExp:
|
||||
for fc in data:
|
||||
walk_content(fc, content_key)
|
||||
|
||||
elif data_type is IonStruct:
|
||||
annot_type = data.get("$687")
|
||||
typ = data.get("$159")
|
||||
|
||||
if typ == "$271":
|
||||
resource_name = data.get("$175")
|
||||
if (
|
||||
resource_name is not None
|
||||
and resource_name not in section_image_resources
|
||||
):
|
||||
section_image_resources.add(resource_name)
|
||||
|
||||
if (
|
||||
allow_duplicates
|
||||
or resource_name not in ordered_image_resources
|
||||
):
|
||||
ordered_image_resources.append(resource_name)
|
||||
|
||||
if "$141" in data:
|
||||
for pt in data["$141"]:
|
||||
if isinstance(pt, IonAnnotation):
|
||||
pt = pt.value
|
||||
|
||||
walk_content(pt, "$141")
|
||||
|
||||
if "$683" in data:
|
||||
walk_content(data["$683"], "$683")
|
||||
|
||||
if "$749" in data:
|
||||
walk_content(
|
||||
self.book.fragments[
|
||||
YJFragmentKey(ftype="$259", fid=data["$749"])
|
||||
],
|
||||
"$259",
|
||||
)
|
||||
|
||||
if "$146" in data:
|
||||
walk_content(data["$146"], "$274" if typ == "$274" else "$146")
|
||||
|
||||
if "$145" in data and annot_type not in ["$584", "$690"]:
|
||||
fv = data["$145"]
|
||||
if ion_type(fv) is not IonStruct:
|
||||
walk_content(fv, "$145")
|
||||
|
||||
if "$176" in data and content_key != "$259":
|
||||
fv = data["$176"]
|
||||
|
||||
if self.book.is_conditional_structure:
|
||||
if fv not in pending_story_names:
|
||||
pending_story_names.append(fv)
|
||||
else:
|
||||
if fv not in processed_story_names:
|
||||
walk_content(
|
||||
self.book.fragments[
|
||||
YJFragmentKey(ftype="$259", fid=fv)
|
||||
],
|
||||
"$259",
|
||||
)
|
||||
processed_story_names.add(fv)
|
||||
|
||||
for fk, fv in data.items():
|
||||
if ion_type(fv) != IonString and fk not in {
|
||||
"$749",
|
||||
"$584",
|
||||
"$683",
|
||||
"$145",
|
||||
"$146",
|
||||
"$141",
|
||||
"$702",
|
||||
"$250",
|
||||
"$176",
|
||||
"yj.dictionary.term",
|
||||
"yj.dictionary.unnormalized_term",
|
||||
}:
|
||||
walk_content(fv, fk)
|
||||
|
||||
walk_content(
|
||||
self.book.fragments[YJFragmentKey(ftype="$260", fid=section_name)],
|
||||
"$260",
|
||||
)
|
||||
|
||||
for story_name in pending_story_names:
|
||||
if story_name not in processed_story_names:
|
||||
walk_content(
|
||||
self.book.fragments[
|
||||
YJFragmentKey(ftype="$259", fid=story_name)
|
||||
],
|
||||
"$259",
|
||||
)
|
||||
processed_story_names.add(story_name)
|
||||
|
||||
for section_name in self.book.ordered_section_names():
|
||||
collect_section_info(section_name)
|
||||
|
||||
return ordered_image_resources
|
||||
|
||||
|
||||
def convert_images_to_pdf_data(ordered_images):
|
||||
if len(ordered_images) == 0:
|
||||
pdf_data = None
|
||||
else:
|
||||
image_list = []
|
||||
for image_resource in ordered_images:
|
||||
image_data = image_resource.data
|
||||
|
||||
if image_resource.image_format == "$548":
|
||||
try:
|
||||
image_data = convert_jxr_to_tiff(
|
||||
image_data, image_resource.location
|
||||
)
|
||||
except Exception as e:
|
||||
log.error(
|
||||
"Exception during conversion of JPEG-XR '%s' to TIFF: %s"
|
||||
% (image_resource.location, repr(e))
|
||||
)
|
||||
|
||||
with disable_debug_log():
|
||||
image = Image.open(io.BytesIO(image_data))
|
||||
image = image.convert("RGB")
|
||||
image_list.append(image)
|
||||
|
||||
first_image = image_list.pop(0)
|
||||
pdf_file = io.BytesIO()
|
||||
|
||||
with disable_debug_log():
|
||||
first_image.save(pdf_file, "pdf", save_all=True, append_images=image_list)
|
||||
|
||||
for image in image_list:
|
||||
image.close()
|
||||
|
||||
first_image.close()
|
||||
|
||||
pdf_data = pdf_file.getvalue()
|
||||
pdf_file.close()
|
||||
|
||||
return pdf_data
|
||||
676
kindle_download_helper/third_party/kfxlib/yj_versions.py
vendored
Normal file
676
kindle_download_helper/third_party/kfxlib/yj_versions.py
vendored
Normal file
@@ -0,0 +1,676 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf8 -*-
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = "GPL v3"
|
||||
__copyright__ = "2016-2022, John Howell <jhowell@acm.org>"
|
||||
|
||||
|
||||
ANY = None
|
||||
TF = {False, True}
|
||||
|
||||
|
||||
PACKAGE_VERSION_PLACEHOLDERS = {
|
||||
"PackageVersion:YJReaderSDK-1.0.x.x GitSHA:c805492 Month-Day:04-22",
|
||||
"PackageVersion:YJReaderSDK-1.0.x.x GitSHA:[33mc805492[m Month-Day:04-22",
|
||||
"kfxlib-00000000",
|
||||
}
|
||||
|
||||
|
||||
KNOWN_KFX_GENERATORS = {
|
||||
("2.16", "PackageVersion:YJReaderSDK-1.0.824.0 Month-Day:04-09"),
|
||||
("3.41.1.0", "PackageVersion:YJReaderSDK-1.0.1962.11 Month-Day:10-17"),
|
||||
("3.42.1.0", "PackageVersion:YJReaderSDK-1.0.2044.4 Month-Day:10-28"),
|
||||
("6.11.1.2", "PackageVersion:YJReaderSDK-1.0.2467.43 Month-Day:07-05"),
|
||||
("6.11.1.2", "PackageVersion:YJReaderSDK-1.0.2467.8 Month-Day:07-14"),
|
||||
("6.11.1.2", "PackageVersion:YJReaderSDK-1.0.2539.3 Month-Day:03-17"),
|
||||
("6.20.1.0", "PackageVersion:YJReaderSDK-1.0.2685.4 Month-Day:05-19"),
|
||||
("6.24.1.0", "PackageVersion:YJReaderSDK-1.1.67.2 Month-Day:06-18"),
|
||||
("6.28.1.0", "PackageVersion:YJReaderSDK-1.1.67.4 Month-Day:07-14"),
|
||||
("6.28.2.0", "PackageVersion:YJReaderSDK-1.1.147.0 Month-Day:09-10"),
|
||||
("7.38.1.0", "PackageVersion:YJReaderSDK-1.2.173.0 Month-Day:09-20"),
|
||||
("7.45.1.0", "PackageVersion:YJReaderSDK-1.4.23.0 Month-Day:11-23"),
|
||||
("7.58.1.0", "PackageVersion:YJReaderSDK-1.5.116.0 Month-Day:02-25"),
|
||||
("7.66.1.0", "PackageVersion:YJReaderSDK-1.5.185.0 Month-Day:04-13"),
|
||||
("7.66.1.0", "PackageVersion:YJReaderSDK-1.5.195.0 Month-Day:04-20"),
|
||||
("7.91.1.0", "PackageVersion:YJReaderSDK-1.5.566.6 Month-Day:11-03"),
|
||||
("7.91.1.0", "PackageVersion:YJReaderSDK-1.5.595.1 Month-Day:11-30"),
|
||||
("7.111.1.1", "PackageVersion:YJReaderSDK-1.6.444.0 Month-Day:02-27"),
|
||||
("7.111.1.1", "PackageVersion:YJReaderSDK-1.6.444.5 Month-Day:03-20"),
|
||||
("7.121.3.0", "PackageVersion:YJReaderSDK-1.6.444.18 Month-Day:05-02"),
|
||||
("7.125.1.0", "PackageVersion:YJReaderSDK-1.6.444.24 Month-Day:06-01"),
|
||||
("7.125.1.0", "PackageVersion:YJReaderSDK-1.6.444.33 Month-Day:06-16"),
|
||||
("7.131.2.0", "PackageVersion:YJReaderSDK-1.6.444.36 Month-Day:07-10"),
|
||||
("7.135.2.0", "PackageVersion:YJReaderSDK-1.6.1034.2 Month-Day:08-23"),
|
||||
("7.135.2.0", "PackageVersion:YJReaderSDK-1.6.1034.13 Month-Day:10-09"),
|
||||
("7.135.2.0", "PackageVersion:YJReaderSDK-1.6.1034.17 Month-Day:11-06"),
|
||||
("7.149.1.0", "PackageVersion:YJReaderSDK-1.6.1034.59 Month-Day:12-06"),
|
||||
("7.149.1.0", "PackageVersion:YJReaderSDK-1.6.1034.62 Month-Day:12-21"),
|
||||
("7.149.1.0", "PackageVersion:YJReaderSDK-1.6.1034.72 Month-Day:01-04"),
|
||||
("7.149.1.0", "PackageVersion:YJReaderSDK-1.6.1871.0 Month-Day:01-23"),
|
||||
("7.149.1.0", "PackageVersion:YJReaderSDK-1.6.1938.0 Month-Day:01-29"),
|
||||
("7.149.1.0", "PackageVersion:YJReaderSDK-1.6.2071.0 Month-Day:02-12"),
|
||||
("7.149.1.0", "PackageVersion:YJReaderSDK-1.6.200363.0 Month-Day:03-19"),
|
||||
("7.153.1.0", ""),
|
||||
("7.165.1.1", ""),
|
||||
("7.168.1.0", ""),
|
||||
("7.171.1.0", ""),
|
||||
("7.174.1.0", ""),
|
||||
("7.177.1.0", ""),
|
||||
("7.180.1.0", ""),
|
||||
("7.182.1.0", ""),
|
||||
("7.188.1.0", ""),
|
||||
("7.191.1.0", ""),
|
||||
("7.213.1.0", ""),
|
||||
("7.220.2.0", ""),
|
||||
("7.228.1.0", ""),
|
||||
("7.232.1.0", ""),
|
||||
("7.236.1.0", ""),
|
||||
("20.12.238.0", ""),
|
||||
}
|
||||
|
||||
|
||||
GENERIC_CREATOR_VERSIONS = {
|
||||
("YJConversionTools", "2.15.0"),
|
||||
("KTC", "1.0.11.1"),
|
||||
("", ""),
|
||||
}
|
||||
|
||||
|
||||
KNOWN_FEATURES = {
|
||||
"symbols": {
|
||||
"max_id": {
|
||||
489,
|
||||
609,
|
||||
620,
|
||||
626,
|
||||
627,
|
||||
634,
|
||||
652,
|
||||
662,
|
||||
667,
|
||||
668,
|
||||
673,
|
||||
681,
|
||||
693,
|
||||
695,
|
||||
696,
|
||||
697,
|
||||
700,
|
||||
701,
|
||||
705,
|
||||
716,
|
||||
748,
|
||||
753,
|
||||
754,
|
||||
755,
|
||||
759,
|
||||
761,
|
||||
777,
|
||||
779,
|
||||
783,
|
||||
785,
|
||||
786,
|
||||
787,
|
||||
789,
|
||||
797,
|
||||
804,
|
||||
825,
|
||||
},
|
||||
},
|
||||
"format_capabilities": {
|
||||
"kfxgen.pidMapWithOffset": {1},
|
||||
"kfxgen.positionMaps": {2},
|
||||
"kfxgen.textBlock": {1},
|
||||
"db.schema": {1},
|
||||
},
|
||||
"SDK.Marker": {
|
||||
"CanonicalFormat": {
|
||||
1,
|
||||
2,
|
||||
},
|
||||
},
|
||||
"com.amazon.yjconversion": {
|
||||
"ar-reflow-language": {
|
||||
1,
|
||||
},
|
||||
"cn-reflow-language": {
|
||||
1,
|
||||
},
|
||||
"indic-reflow-language": {
|
||||
1,
|
||||
},
|
||||
"jp-reflow-language": {
|
||||
1,
|
||||
},
|
||||
"jpvertical-reflow-language": {
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
},
|
||||
"reflow-language": {
|
||||
2,
|
||||
3,
|
||||
},
|
||||
"reflow-language-expansion": {
|
||||
1,
|
||||
},
|
||||
"tcn-reflow-language": {
|
||||
1,
|
||||
},
|
||||
"multiple_reading_orders-switchable": {
|
||||
1,
|
||||
},
|
||||
"reflow-section-size": ANY,
|
||||
"reflow-style": {
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
(2147483646, 2147483647),
|
||||
(2147483647, 2147483647),
|
||||
},
|
||||
"yj_audio": {
|
||||
1,
|
||||
2,
|
||||
},
|
||||
"yj_custom_word_iterator": {
|
||||
1,
|
||||
},
|
||||
"yj_dictionary": {
|
||||
1,
|
||||
2,
|
||||
},
|
||||
"yj_double_page_spread": {
|
||||
1,
|
||||
},
|
||||
"yj_facing_page": {
|
||||
1,
|
||||
},
|
||||
"yj_fixed_layout": {
|
||||
1,
|
||||
},
|
||||
"yj_graphical_highlights": {
|
||||
1,
|
||||
},
|
||||
"yj_hdv": {
|
||||
1,
|
||||
2,
|
||||
},
|
||||
"yj_interactive_image": {
|
||||
1,
|
||||
},
|
||||
"yj_jpegxr_sd": {
|
||||
1,
|
||||
},
|
||||
"yj_jpg_rst_marker_present": {
|
||||
1,
|
||||
},
|
||||
"yj_mathml": {
|
||||
1,
|
||||
},
|
||||
"yj_mixed_writing_mode": {
|
||||
1,
|
||||
2,
|
||||
},
|
||||
"yj_non_pdf_fixed_layout": {
|
||||
2,
|
||||
},
|
||||
"yj_pdf_links": {
|
||||
1,
|
||||
},
|
||||
"yj_pdf_support": {
|
||||
1,
|
||||
},
|
||||
"yj_publisher_panels": {
|
||||
2,
|
||||
},
|
||||
"yj_rotated_pages": {
|
||||
1,
|
||||
},
|
||||
"yj_ruby": {
|
||||
1,
|
||||
},
|
||||
"yj_table": {
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
},
|
||||
"yj_table_viewer": {
|
||||
1,
|
||||
2,
|
||||
},
|
||||
"yj_textbook": {
|
||||
1,
|
||||
},
|
||||
"yj_thumbnails_present": {
|
||||
1,
|
||||
},
|
||||
"yj_vertical_text_shadow": {
|
||||
1,
|
||||
},
|
||||
"yj_video": {
|
||||
1,
|
||||
},
|
||||
"yj.conditional_structure": {
|
||||
1,
|
||||
},
|
||||
"yj.illustrated_layout": {
|
||||
1,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
KNOWN_SUPPORTED_FEATURES = {
|
||||
("$660",),
|
||||
("$751",),
|
||||
("$664", "crop_bleed", 1),
|
||||
}
|
||||
|
||||
|
||||
KNOWN_METADATA = {
|
||||
"book_navigation": {
|
||||
"pages": ANY,
|
||||
},
|
||||
"kindle_audit_metadata": {
|
||||
"file_creator": {
|
||||
"YJConversionTools",
|
||||
"FLYP",
|
||||
"KTC",
|
||||
"KC",
|
||||
"KPR",
|
||||
},
|
||||
"creator_version": {
|
||||
"2.15.0",
|
||||
"0.1.24.0",
|
||||
"0.1.26.0",
|
||||
"2.0.0.1",
|
||||
"1.0.11.1",
|
||||
"1.3.0.0",
|
||||
"1.5.14.0",
|
||||
"1.8.1.0",
|
||||
"1.9.2.0",
|
||||
"1.11.399.0",
|
||||
"1.11.539.0",
|
||||
"1.12.11.0",
|
||||
"1.13.7.0",
|
||||
"1.13.10.0",
|
||||
"0.93.187.0",
|
||||
"0.94.32.0",
|
||||
"0.95.8.0",
|
||||
"0.96.4.0",
|
||||
"0.96.40.0",
|
||||
"0.97.79.3",
|
||||
"0.98.260.0",
|
||||
"0.98.315.0",
|
||||
"0.99.28.0",
|
||||
"0.101.1.0",
|
||||
"0.102.0.0",
|
||||
"0.103.0.0",
|
||||
"1.0.319.0",
|
||||
"1.1.58.0",
|
||||
"1.2.83.0",
|
||||
"1.3.30.0",
|
||||
"1.4.200067.0",
|
||||
"1.5.60.0",
|
||||
"1.6.97.0",
|
||||
"1.7.223.0",
|
||||
"1.8.50.0",
|
||||
"1.9.52.0",
|
||||
"1.10.214.0",
|
||||
"1.11.576.0",
|
||||
"1.12.39.0",
|
||||
"1.14.112.0",
|
||||
"1.15.20.0",
|
||||
"1.16.2.0",
|
||||
"1.18.0.0",
|
||||
"1.20.1.0",
|
||||
"1.21.6.0",
|
||||
"1.22.13.0",
|
||||
"1.23.0.0",
|
||||
"1.24.33.0",
|
||||
"1.25.34.0",
|
||||
"1.26.14.0",
|
||||
"1.27.14.0",
|
||||
"1.28.12.0",
|
||||
"1.29.17.0",
|
||||
"1.30.4.0",
|
||||
"1.31.0.0",
|
||||
"1.32.1.0",
|
||||
"1.33.3.0",
|
||||
"1.34.20.0",
|
||||
"1.35.210.0",
|
||||
"1.35.618.0",
|
||||
"1.35.770.0",
|
||||
"1.36.1.0",
|
||||
"1.36.20.0",
|
||||
"1.37.2.0",
|
||||
"1.38.0.0",
|
||||
"1.38.37.0",
|
||||
"1.39.30.0",
|
||||
"1.40.6.0",
|
||||
"1.41.10.0",
|
||||
"1.42.2.0",
|
||||
"1.42.6.0",
|
||||
"1.43.0.0",
|
||||
"1.44.13.0",
|
||||
"1.45.20.0",
|
||||
"1.46.2.0",
|
||||
"1.47.1.0",
|
||||
"1.48.7.0",
|
||||
"1.49.0.0",
|
||||
"1.50.0.0",
|
||||
"1.51.1.0",
|
||||
"1.52.2.0",
|
||||
"1.52.4.0",
|
||||
"1.52.6.0",
|
||||
"1.53.1.0",
|
||||
"1.54.0.0",
|
||||
"1.55.0.0",
|
||||
"1.56.0.0",
|
||||
"1.57.0.0",
|
||||
"1.58.0.0",
|
||||
"1.59.0.0",
|
||||
"1.60.0.0",
|
||||
"1.60.1.0",
|
||||
"1.60.2.0",
|
||||
"1.61.0.0",
|
||||
"1.62.0.0",
|
||||
"1.62.1.0",
|
||||
"1.63.0.0",
|
||||
"3.0.0",
|
||||
"3.1.0",
|
||||
"3.2.0",
|
||||
"3.3.0",
|
||||
"3.4.0",
|
||||
"3.5.0",
|
||||
"3.6.0",
|
||||
"3.7.0",
|
||||
"3.7.1",
|
||||
"3.8.0",
|
||||
"3.9.0",
|
||||
"3.10.0",
|
||||
"3.10.1",
|
||||
"3.11.0",
|
||||
"3.12.0",
|
||||
"3.13.0",
|
||||
"3.14.0",
|
||||
"3.15.0",
|
||||
"3.16.0",
|
||||
"3.17.0",
|
||||
"3.17.1",
|
||||
"3.20.0",
|
||||
"3.20.1",
|
||||
"3.21.0",
|
||||
"3.22.0",
|
||||
"3.23.0",
|
||||
"3.24.0",
|
||||
"3.25.0",
|
||||
"3.26.0",
|
||||
"3.27.0",
|
||||
"3.28.0",
|
||||
"3.28.1",
|
||||
"3.29.0",
|
||||
"3.29.1",
|
||||
"3.29.2",
|
||||
"3.30.0",
|
||||
"3.31.0",
|
||||
"3.32.0",
|
||||
"3.33.0",
|
||||
"3.34.0",
|
||||
"3.35.0",
|
||||
"3.36.0",
|
||||
"3.36.1",
|
||||
"3.37.0",
|
||||
"3.38.0",
|
||||
"3.39.0",
|
||||
"3.39.1",
|
||||
"3.40.0",
|
||||
"3.41.0",
|
||||
"3.42.0",
|
||||
"3.43.0",
|
||||
"3.44.0",
|
||||
"3.45.0",
|
||||
"3.46.0",
|
||||
"3.47.0",
|
||||
"3.48.0",
|
||||
"3.49.0",
|
||||
"3.50.0",
|
||||
"3.51.0",
|
||||
"3.52.0",
|
||||
"3.52.1",
|
||||
"3.53.0",
|
||||
"3.54.0",
|
||||
"3.55.0",
|
||||
"3.56.0",
|
||||
"3.56.1",
|
||||
"3.57.0",
|
||||
"3.57.1",
|
||||
"3.58.0",
|
||||
"3.59.0",
|
||||
"3.59.1",
|
||||
"3.60.0",
|
||||
"3.61.0",
|
||||
},
|
||||
},
|
||||
"kindle_capability_metadata": {
|
||||
"continuous_popup_progression": {
|
||||
0,
|
||||
},
|
||||
"graphical_highlights": {1},
|
||||
"yj_double_page_spread": {1},
|
||||
"yj_facing_page": {1},
|
||||
"yj_fixed_layout": {1},
|
||||
"yj_has_animations": {1},
|
||||
"yj_illustrated_layout": {1},
|
||||
"yj_publisher_panels": {1},
|
||||
"yj_textbook": {1},
|
||||
},
|
||||
"kindle_ebook_metadata": {
|
||||
"book_orientation_lock": {"landscape", "portrait", "none"},
|
||||
"multipage_selection": {"disabled"},
|
||||
"nested_span": {"enabled"},
|
||||
"selection": {"enabled"},
|
||||
"user_visible_labeling": {"page_exclusive"},
|
||||
},
|
||||
"kindle_title_metadata": {
|
||||
"cde_content_type": {
|
||||
"EBOK",
|
||||
"EBSP",
|
||||
"MAGZ",
|
||||
"PDOC",
|
||||
},
|
||||
"ASIN": ANY,
|
||||
"asset_id": ANY,
|
||||
"author": ANY,
|
||||
"author_pronunciation": ANY,
|
||||
"book_id": ANY,
|
||||
"content_id": ANY,
|
||||
"cover_image": ANY,
|
||||
"description": ANY,
|
||||
"dictionary_lookup": ANY,
|
||||
"editionVersion": ANY,
|
||||
"imprint_pronunciation": ANY,
|
||||
"is_dictionary": {True},
|
||||
"is_sample": TF,
|
||||
"issue_date": ANY,
|
||||
"itemType": {"MAGZ"},
|
||||
"language": ANY,
|
||||
"override_kindle_font": TF,
|
||||
"parent_asin": ANY,
|
||||
"periodicals_generation_V2": {"true"},
|
||||
"publisher": ANY,
|
||||
"title": ANY,
|
||||
"title_pronunciation": ANY,
|
||||
"updateTime": ANY,
|
||||
},
|
||||
"metadata": {
|
||||
"ASIN": ANY,
|
||||
"asset_id": ANY,
|
||||
"author": ANY,
|
||||
"binding_direction": {"binding_direction_left"},
|
||||
"cde_content_type": {
|
||||
"EBOK",
|
||||
"MAGZ",
|
||||
"PDOC",
|
||||
},
|
||||
"cover_image": ANY,
|
||||
"cover_page": ANY,
|
||||
"doc_sym_publication_id": ANY,
|
||||
"description": ANY,
|
||||
"issue_date": ANY,
|
||||
"language": ANY,
|
||||
"orientation": {"portrait", "landscape"},
|
||||
"parent_asin": ANY,
|
||||
"publisher": ANY,
|
||||
"reading_orders": ANY,
|
||||
"support_landscape": TF,
|
||||
"support_portrait": TF,
|
||||
"target_NarrowDimension": ANY,
|
||||
"target_WideDimension": ANY,
|
||||
"title": ANY,
|
||||
"version": {1.0},
|
||||
"volume_label": ANY,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
KNOWN_AUXILIARY_METADATA = {
|
||||
"ANCHOR_REFERRED_BY_CONTAINERS": ANY,
|
||||
"auxData_resource_list": ANY,
|
||||
"base_line": ANY,
|
||||
"button_type": {1},
|
||||
"checkbox_state": ANY,
|
||||
"dropDown_count": ANY,
|
||||
"filename.opf": ANY,
|
||||
"has_large_data_table": TF,
|
||||
"IsSymNameBased": TF,
|
||||
"IS_TARGET_SECTION": {True},
|
||||
"kSectionContainsAVI": {True},
|
||||
"links_extracted": {True},
|
||||
"link_from_text": TF,
|
||||
"location": ANY,
|
||||
"mime": {"Audio", "Figure", "Video"},
|
||||
"ModifiedContentInfo": ANY,
|
||||
"modified_time": ANY,
|
||||
"most-common-computed-style": ANY,
|
||||
"namespace": {"KindleConversion"},
|
||||
"num-dual-covers-removed": {1},
|
||||
"page_rotation": {0, 1},
|
||||
"plugin_group_list": ANY,
|
||||
"resizable_plugin": TF,
|
||||
"resource_stream": ANY,
|
||||
"size": ANY,
|
||||
"SourceIdContentInfo": ANY,
|
||||
"target": ANY,
|
||||
"text_baseline": ANY,
|
||||
"text_ext": {1},
|
||||
"type": {"resource"},
|
||||
"yj.dictionary.first_head_word": ANY,
|
||||
"yj.dictionary.inflection_rules": ANY,
|
||||
}
|
||||
|
||||
|
||||
KNOWN_KCB_DATA = {
|
||||
"book_state": {
|
||||
"book_input_type": [
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
6,
|
||||
7,
|
||||
],
|
||||
"book_reading_direction": [
|
||||
0,
|
||||
2,
|
||||
],
|
||||
"book_target_type": [
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
],
|
||||
},
|
||||
"content_hash": {},
|
||||
"metadata": {
|
||||
"book_path": ANY,
|
||||
"edited_tool_versions": KNOWN_METADATA["kindle_audit_metadata"][
|
||||
"creator_version"
|
||||
],
|
||||
"format": ["yj"],
|
||||
"global_styling": TF,
|
||||
"id": ANY,
|
||||
"log_path": ANY,
|
||||
"platform": ["mac", "win"],
|
||||
"quality_report": ANY,
|
||||
"source_path": ANY,
|
||||
"tool_name": ["KC", "KPR", "KTC", "Kindle Previewer 3"],
|
||||
"tool_version": KNOWN_METADATA["kindle_audit_metadata"]["creator_version"],
|
||||
},
|
||||
"tool_data": {
|
||||
"cache_path": ANY,
|
||||
"created_on": ANY,
|
||||
"last_modified_time": ANY,
|
||||
"link_extract_choice": TF,
|
||||
"link_notification_preference": TF,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def is_known_generator(kfxgen_application_version, kfxgen_package_version):
|
||||
if (
|
||||
kfxgen_application_version == ""
|
||||
or kfxgen_application_version.startswith("kfxlib")
|
||||
or kfxgen_application_version.startswith("KC")
|
||||
or kfxgen_application_version.startswith("KPR")
|
||||
):
|
||||
return True
|
||||
|
||||
if kfxgen_package_version in PACKAGE_VERSION_PLACEHOLDERS:
|
||||
kfxgen_package_version = ""
|
||||
|
||||
return (kfxgen_application_version, kfxgen_package_version) in KNOWN_KFX_GENERATORS
|
||||
|
||||
|
||||
def is_known_feature(cat, key, val):
|
||||
vals = KNOWN_FEATURES.get(cat, {}).get(key, [])
|
||||
return vals is ANY or val in vals
|
||||
|
||||
|
||||
def is_known_metadata(cat, key, val):
|
||||
vals = KNOWN_METADATA.get(cat, {}).get(key, [])
|
||||
return vals is ANY or val in vals
|
||||
|
||||
|
||||
def is_known_aux_metadata(key, val):
|
||||
vals = KNOWN_AUXILIARY_METADATA.get(key, [])
|
||||
return vals is ANY or val in vals
|
||||
|
||||
|
||||
def is_known_kcb_data(cat, key, val):
|
||||
vals = KNOWN_KCB_DATA.get(cat, {}).get(key, [])
|
||||
return vals is ANY or val in vals
|
||||
@@ -1,4 +1,5 @@
|
||||
import re
|
||||
|
||||
from kindle_download_helper.config import GITHUB_README_COMMENTS
|
||||
|
||||
|
||||
|
||||
105
no_cli.py
Normal file
105
no_cli.py
Normal file
@@ -0,0 +1,105 @@
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
|
||||
from kindle_download_helper.config import (
|
||||
DEFAULT_OUT_DEDRM_DIR,
|
||||
DEFAULT_OUT_DIR,
|
||||
DEFAULT_OUT_EPUB_DIR,
|
||||
)
|
||||
from kindle_download_helper.no_kindle import NoKindle
|
||||
|
||||
|
||||
def no_main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"-e",
|
||||
"--email",
|
||||
help="amazon login email",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-p",
|
||||
"--password",
|
||||
help="amazon login password",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--com",
|
||||
dest="domain",
|
||||
action="store_const",
|
||||
const="uk",
|
||||
default="cn",
|
||||
help="if your account is an amazon.co.uk account",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cn",
|
||||
dest="domain",
|
||||
action="store_const",
|
||||
const="cn",
|
||||
default="cn",
|
||||
help="if your account is an amazon.cn account",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--jp",
|
||||
dest="domain",
|
||||
action="store_const",
|
||||
const="co.jp",
|
||||
default="cn",
|
||||
help="if your account is an amazon.co.jp account",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--de",
|
||||
dest="domain",
|
||||
action="store_const",
|
||||
const="de",
|
||||
default="cn",
|
||||
help="if your account is an amazon.de account",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--uk",
|
||||
dest="domain",
|
||||
action="store_const",
|
||||
const="uk",
|
||||
default="cn",
|
||||
help="if your account is an amazon.co.uk account",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o", "--outdir", default=DEFAULT_OUT_DIR, help="dwonload output dir"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-od",
|
||||
"--outdedrmdir",
|
||||
default=DEFAULT_OUT_DEDRM_DIR,
|
||||
help="dwonload output dedrm dir",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-oe",
|
||||
"--outepubmdir",
|
||||
default=DEFAULT_OUT_EPUB_DIR,
|
||||
help="dwonload output epub dir",
|
||||
)
|
||||
options = parser.parse_args()
|
||||
if options.email is None or options.password is None:
|
||||
raise Exception("Please provide email and password")
|
||||
|
||||
if not os.path.exists(options.outdir):
|
||||
os.makedirs(options.outdir)
|
||||
# for epub
|
||||
if not os.path.exists(options.outepubmdir):
|
||||
os.makedirs(options.outepubmdir)
|
||||
|
||||
nk = NoKindle(options.email, options.password, options.domain)
|
||||
nk.make_library()
|
||||
for e in nk.ebooks:
|
||||
try:
|
||||
nk.download_book(e["ASIN"])
|
||||
except Exception as e:
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
print(e)
|
||||
# spider rule
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
no_main()
|
||||
Reference in New Issue
Block a user