diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 8705997..0000000 --- a/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -MIT License - -Copyright (c) 2022 yihong, frostming and contributors - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/README.md b/README.md index 7fac828..61da482 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Kindle_download_helper -Download all your kindle books script. +Download all your kindle books and `DeDRM` script. image ## 安装 Kindle_download_helper @@ -17,11 +17,15 @@ Download all your kindle books script. - Mac 新手指南 by @chongiscool,见 [#76](https://github.com/yihong0618/Kindle_download_helper/issues/76) + ### Cli 安装使用 1. python3 2. requirements +or just pip +pip3 install kindle_download + ```python python3 --version #查看 python 版本 ``` @@ -36,6 +40,7 @@ pip3 install -r requirements.txt ```python python3 kindle.py --h #查看使用参数 +kindle_download --h # pip usage: kindle.py [-h] [--cookie COOKIE | --cookie-file COOKIE_FILE] [--cn] [--jp] [--de] [--resume-from INDEX] [--cut-length CUT_LENGTH] [-o OUTDIR] [-od OUTDEDRMDIR] [-s SESSION_FILE] [--pdoc] [--resolve_duplicate_names] @@ -66,7 +71,7 @@ options: --resolve_duplicate_names Resolve duplicate names files to download --readme If you want to generate kindle readme stats - --dedrm If you want to `dedrm` directly + --dedrm If you want to `DeDRM` directly --list just list books/pdoc, not to download ``` @@ -77,6 +82,8 @@ options: ```python python3 kindle.py --dedrm --cn ## --dedrm 移除 DRM +or +kindle_download --dedrm --cn ``` (推荐) 手动输入 cookie、csrfToken 进行下载 @@ -84,6 +91,9 @@ python3 kindle.py --dedrm --cn ## --dedrm 移除 DRM ```python python3 kindle.py ${csrfToken} --cookie ${cookie} --dedrm --cn #下载国区 Kindle 书籍并移除 DRM python3 kindle.py ${csrfToken} --cookie ${cookie} --dedrm #下载美区 Kindle 书籍 +or +kindle_download ${csrfToken} --cookie ${cookie} --dedrm --cn #下载国区 Kindle 书籍并移除 DRM +kindle_download ${csrfToken} --cookie ${cookie} --dedrm #下载美区 Kindle 书籍 ``` ### 获取 cookie @@ -154,7 +164,7 @@ python3 kindle.py --cn --cookie ${cookie} ${csrfToken} - cookie 和 csrf token 会过期,重新刷新下 amazon 的页面就行 - 程序会自动在命令执行的目录下创建 `DOWNLOADS` 目录,书会下载在 `DOWNLOADS` 里 -- 支持 mobi 类型的文件直接 dedrm `--dedrm` 生成的文件在 `DEDRMS` 里 +- 支持 mobi 类型的文件直接 DeDRM `--dedrm` 生成的文件在 `DEDRMS` 里 - 如果你用 [DeDRM_tools](https://github.com/apprenticeharper/DeDRM_tools) 解密 key 存在 key.txt 里 - 或者直接拖进 Calibre 里 please google it. - 如果过程中失败了可以使用 e.g. `--resume-from ${num}` @@ -169,7 +179,8 @@ python3 kindle.py --cn --cookie ${cookie} ${csrfToken} - The cookie and csrf token will expire, just refresh the amazon page again. - The program will automatically create `DOWNLOADS` directory under the command execution directory, the book will be downloaded in `DOWNLOADS` directory. -- If you use [DeDRM_tools](https://github.com/apprenticeharper/DeDRM_tools) to decrypt the key, it will be stored in key.txt +- Support DeDRM with `--dedrm` +- or use [DeDRM_tools](https://github.com/apprenticeharper/DeDRM_tools) to decrypt the key, it will be stored in key.txt - or just drag it into Calibre. Please google it. - If the process fails you can use e.g. `--resume-from ${num}` - If the name is too long, you can add: `-cut-length 80` to truncate the file name diff --git a/icon_rc.py b/icon_rc.py index dba7aa8..79b9cb5 100644 --- a/icon_rc.py +++ b/icon_rc.py @@ -1816,7 +1816,7 @@ qt_resource_struct = b"\ \x00\x00\x00\x0e\x00\x02\x00\x00\x00\x01\x00\x00\x00\x03\ \x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00$\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\ -\x00\x00\x01\x82\x0f!\xe2\xc8\ +\x00\x00\x01\x82X\xe5\xc8\x87\ " def qInitResources(): diff --git a/kindle.py b/kindle.py index f31a796..e96ee43 100644 --- a/kindle.py +++ b/kindle.py @@ -1,691 +1,5 @@ -""" -Note some download code from: https://github.com/sghctoma/bOOkp -Great Thanks -""" - -import argparse -import atexit -import html -import json -import logging -import os -import pickle -import re -import time -import urllib -from http.cookies import SimpleCookie - -import requests -import urllib3 -from faker import Faker -from requests.adapters import HTTPAdapter - -from dedrm import MobiBook, get_pid_list - -try: - import browser_cookie3 -except ModuleNotFoundError: - print("not found browser_cookie3 here, you should use --cookie command") - -logger = logging.getLogger("kindle") -fh = logging.FileHandler(".error_books.log") -fh.setLevel(logging.ERROR) -logger.addHandler(fh) - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) - -DEFAULT_OUT_DIR = "DOWNLOADS" -DEFAULT_OUT_DEDRM_DIR = "DEDRMS" -DEFAULT_SESSION_FILE = ".kindle_session" - - -KINDLE_HEADER = { - "User-Agent": Faker().user_agent(), -} - -CONTENT_TYPES = { - "EBOK": "Ebook", - "PDOC": "KindlePDoc", -} - -KINDLE_URLS = { - "cn": { - "bookall": "https://www.amazon.cn/hz/mycd/myx#/home/content/booksAll", - "download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}&authPool=AmazonCN", - "payload": "https://www.amazon.cn/hz/mycd/ajax", - "insights": "https://www.amazon.cn/kindle/reading/insights/data", - "book_url": "https://www.amazon.cn/dp/{book_id}", - }, - "jp": { - "bookall": "https://www.amazon.jp/hz/mycd/myx#/home/content/booksAll", - "download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}", - "payload": "https://www.amazon.co.jp/hz/mycd/ajax", - "insights": "https://www.amazon.co.jp/kindle/reading/insights/data", - "book_url": "https://www.amazon.co.jp/dp/{book_id}", - }, - "de": { - "bookall": "https://www.amazon.de/hz/mycd/myx#/home/content/booksAll", - "download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}", - "payload": "https://www.amazon.de/hz/mycd/ajax", - "insights": "https://www.amazon.de/kindle/reading/insights/data", - "book_url": "https://www.amazon.de/dp/{book_id}", - }, - "com": { - "bookall": "https://www.amazon.com/hz/mycd/myx#/home/content/booksAll", - "download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}", - "payload": "https://www.amazon.com/hz/mycd/ajax", - "insights": "https://www.amazon.com/kindle/reading/insights/data", - "book_url": "https://www.amazon.com/dp/{book_id}", - }, -} - -# for kindle stats -GITHUB_README_COMMENTS = ( - "(\n)(.*)(\n)" -) -MY_KINDLE_STATS_INFO_HEAD = "## My Kindle Stats\n" -MY_KINDLE_STATS_INFO = "- I bought {books_len} books\n \ -- I pushed {pdocs_len} docks\n \ -- My first book is {first_book_title}, bought on {first_book_bought_date}\n \ -- My first doc is {first_doc_title}, pushed on {first_doc_push_date}\n\n" - -KINDLE_TABLE_HEAD = "| ID | Title | Authors | Acquired | Read | \n | ---- | ---- | ---- | ---- | ---- |\n" -KINDLE_STAT_TEMPLATE = "| {id} | {title} | {authors} | {acquired} | {read} |\n" - - -def replace_readme_comments(file_name, comment_str, comments_name): - with open(file_name, "r+", encoding="UTF-8") as f: - text = f.read() - # regrex sub from github readme comments - text = re.sub( - GITHUB_README_COMMENTS.format(name=comments_name), - r"\1{}\n\3".format(comment_str), - text, - flags=re.DOTALL, - ) - f.seek(0) - f.write(text) - f.truncate() - - -class Kindle: - def __init__( - self, - csrf_token=None, - domain="cn", - out_dir=DEFAULT_OUT_DIR, - out_dedrm_dir=DEFAULT_OUT_DEDRM_DIR, - cut_length=100, - session_file=DEFAULT_SESSION_FILE, - ): - self.urls = KINDLE_URLS[domain] - self._csrf_token = csrf_token - self.total_to_download = 0 - self.out_dir = out_dir - self.out_dedrm_dir = out_dedrm_dir - self.dedrm = False - self.cut_length = cut_length - self.not_done = False - self.session_file = session_file - self.session = self.make_session() - self.is_browser_cookie = False - self.to_resolve_duplicate_names = False - self.books_info_dict = {} - self.file_type_list = ["EBOOK", "PDOC"] - atexit.register(self.dump_session) - - def set_cookie(self, cookiejar): - if not cookiejar: - raise Exception("Please make sure your amazon cookie is right") - self.session.cookies.clear() - self.session.cookies.update(cookiejar) - - def set_cookie_from_string(self, cookie_string): - cj = self._parse_kindle_cookie(cookie_string) - self.set_cookie(cj) - - def dump_session(self): - with open(self.session_file, "wb") as f: - pickle.dump(self.session, f) - - @property - def csrf_token(self): - if not self._csrf_token: - self._csrf_token = self._get_csrf_token() - return self._csrf_token - - @csrf_token.setter - def csrf_token(self, csrf_token): - self._csrf_token = csrf_token - - def ensure_session_cookie(self): - if not self.session.cookies: - logger.debug("No cookie found, trying to load from browsers") - try: - self.set_cookie(browser_cookie3.load(domain_name="amazon")) - except: - print("not found browser_cookie3 here, you should use --cookie command") - - @staticmethod - def _parse_kindle_cookie(kindle_cookie): - cookie = SimpleCookie() - cookie.load(kindle_cookie) - cookies_dict = {} - cookiejar = None - for key, morsel in cookie.items(): - cookies_dict[key] = morsel.value - cookiejar = requests.utils.cookiejar_from_dict( - cookies_dict, cookiejar=None, overwrite=True - ) - return cookiejar - - def _get_csrf_token(self): - """ - TODO: I do not know why I have to get csrf token in the page not in this way - maybe figure out why in the future - """ - r = self.session.get(self.urls["bookall"]) - match = re.search(r'var csrfToken = "(.*)";', r.text) - if not match: - self.revoke_cookie_token(open_page=self.is_browser_cookie) - raise Exception( - "Can't get the csrf token, " - f"please refresh the page at {self.urls['bookall']} and retry" - ) - return match.group(1) - - def refresh_browser_cookie(self): - import webbrowser - - try: - webbrowser.open(self.urls["bookall"]) - except Exception: - pass - - def revoke_cookie_token(self, open_page=False): - # help user open it directly. - logger.info( - "Opening the url to get cookie...You can wait for the page to finish loading and retry" - ) - self._csrf_token = None # reset the token - # clear the cookies so the next time it can be reloaded from the browsers - self.session.cookies.clear() - if open_page: - self.refresh_browser_cookie() - - def ensure_cookie_token(self): - if not self._csrf_token: - if not self.session.cookies: - self.refresh_browser_cookie() - self.ensure_session_cookie() - self._csrf_token = self._get_csrf_token() - logger.debug( - f"session-id: { self.session.cookies.get_dict().get('session-id') }" - ) - - def make_session(self): - if os.path.exists(self.session_file): - with open(self.session_file, "rb") as f: - session = pickle.load(f) - else: - session = requests.Session() - session.headers.update(KINDLE_HEADER) - session.mount( - # will retry 5 times after 0.5, 1.0, 2.0, 4.0, ... seconds for - # (413, 429, 503) statuses - "https://", - HTTPAdapter(max_retries=urllib3.Retry(5, backoff_factor=0.5)), - ) - - logger.debug(f"user-agent: { session.headers.get('User-Agent') }") - return session - - def get_devices(self): - """ - This method must be called before each download, so we ensure - the session cookies before it is called - """ - self.ensure_cookie_token() - - payload = {"param": {"GetDevices": {}}} - r = self.session.post( - self.urls["payload"], - data={ - "data": json.dumps(payload), - "csrfToken": self.csrf_token, - }, - ) - r.raise_for_status() - devices = r.json() - if devices.get("error"): - self.revoke_cookie_token(open_page=True) - raise Exception( - f"Error: {devices.get('error')}, please visit {self.urls['bookall']} to revoke the csrftoken and cookie" - ) - devices = r.json()["GetDevices"]["devices"] - # sleep get device first time. - logger.info("Amazon open their bot check will sleep 3s") - time.sleep(3) - if not devices: - raise Exception("No devices are bound to this account") - return [device for device in devices if "deviceSerialNumber" in device] - - def get_all_books(self, start_index=0, filetype="EBOK"): - """ - TODO: refactor this function - """ - # some info - if filetype == "PDOC": - logger.info( - "It will take some time to get all PDOC books list, please wait" - ) - startIndex = start_index - batchSize = 100 - payload = { - "param": { - "OwnershipData": { - "sortOrder": "DESCENDING", - "sortIndex": "DATE", - "startIndex": startIndex, - "batchSize": batchSize, - "contentType": CONTENT_TYPES[filetype], - "itemStatus": ["Active"], - } - } - } - - if filetype == "EBOK": - payload["param"]["OwnershipData"].update( - { - "originType": ["Purchase"], - } - ) - else: - batchSize = 18 - payload["param"]["OwnershipData"].update( - { - "batchSize": batchSize, - "isExtendedMYK": False, - } - ) - - books = [] - ### added by yihong0618 2022.06.27 - ### this ugly code is for amazon open their bot check - ### if the bot check close - ### will delete the try and try code - break_times = 0 - while True: - # anyway sleep 0.5 - time.sleep(0.5) - r = self.session.post( - self.urls["payload"], - data={"data": json.dumps(payload), "csrfToken": self.csrf_token}, - ) - # try three times for bot check - if r.status_code == 503: - # sleep and try again - sleep_seconds = 5 + 2 * break_times - time.sleep(sleep_seconds) - logger.info( - f"Amazon open their bot check will sleep {sleep_seconds}s and try this api again, now index: {startIndex}/{self.total_to_download}" - ) - if break_times < 7: - break_times += 1 - r = self.session.post( - self.urls["payload"], - data={"data": json.dumps(payload), "csrfToken": self.csrf_token}, - ) - if not r.ok: - if r.status_code == 503: - time.sleep(sleep_seconds) - logger.info( - f"Amazon open their bot check will sleep {sleep_seconds}s last time and try this api again, now index: {startIndex}/{self.total_to_download}" - ) - logger.info(f"Next time fail will break the loop") - r = self.session.post( - self.urls["payload"], - data={ - "data": json.dumps(payload), - "csrfToken": self.csrf_token, - }, - ) - break_times += 1 - if not r.ok: - # amazon limit this api - if startIndex == 0: - logger.error( - "Amazon api limit when this download done.\n Please run it again`" - ) - else: - self.not_done = True - logger.error( - "Amazon api limit when this download done.\n You can add command `--resume-from %s`", - startIndex, - ) - break - result = r.json() - if not result.get("success", True): - logger.error("get all books error: %s", result.get("error")) - break - items = result["OwnershipData"]["items"] - for item in items: - if filetype == "PDOC": - item["title"] = html.unescape(item["title"]) - item["authors"] = html.unescape(item.pop("author", "")) - if item.get("readStatus", "") == "READ": - self.books_info_dict[item["asin"]] = item - - books.extend(items) - if not self.total_to_download: - self.total_to_download = result["OwnershipData"]["numberOfItems"] - - if result["OwnershipData"]["hasMoreItems"]: - startIndex += batchSize - payload["param"]["OwnershipData"]["startIndex"] = startIndex - else: - break - return books - - def _get_reading_stats(self): - insights_url = self.urls["insights"] - r = self.session.get(insights_url) - if r.ok: - return r.json() - logger.error(f"Something is wrong get the stats data url: {insights_url}") - raise Exception(f"Something is wrong get the stats data url: {insights_url}") - - def _make_one_book_stats_info(self, book_info): - book_url = self.urls["book_url"] - asin = book_info["asin"] - book = self.books_info_dict.get(asin) - book_title = book.get("title", "") - # filter the brackets in the book title - book_title = re.sub( - r"(\([^)]*\))|(\([^)]*\))|(\【[^)]*\】)|(\[[^)]*\])|(\s)", "", book_title - ) - book_title = book_title.replace(" ", "") - if book.get("category", "") == "KindleEBook": - book_url = book_url.format(book_id=asin) - book_title = f"[{book_title}]({book_url})" - book_authors = book.get("authors") - if len(book_authors) > 10: - book_authors = ",".join(book_authors.split(",")[:2]) + "..." - # only keep date - read = book_info.get("date_read")[:10] - acquired = ( - book.get("acquiredDate", "") - .replace("年", "-") - .replace("月", "-") - .replace("日", "") - ) - return book_title, book_authors, acquired, read - - def make_kindle_stats_readme(self): - reading_stats = self._get_reading_stats() - read_list = reading_stats.get("goal_info", {}).get("titles_read") - ebooks = self.get_all_books(filetype="EBOK") - pdocs = self.get_all_books(filetype="PDOC") - first_ebook, first_pdoc = ebooks[-1], pdocs[-1] - print(len(self.books_info_dict.keys()), first_ebook, first_pdoc) - print(read_list) - - s = MY_KINDLE_STATS_INFO_HEAD - kindle_stats_str = MY_KINDLE_STATS_INFO.format( - books_len=len(ebooks), - pdocs_len=len(pdocs), - first_book_title=first_ebook["title"], - first_book_bought_date=first_ebook["acquiredDate"], - first_doc_title=first_pdoc["title"], - first_doc_push_date=first_pdoc["acquiredDate"], - ) - s += kindle_stats_str - s += KINDLE_TABLE_HEAD - index = 1 - for book_info in read_list: - book_title, book_authors, acquired, read = self._make_one_book_stats_info( - book_info - ) - s += KINDLE_STAT_TEMPLATE.format( - id=str(index), - title=book_title, - authors=book_authors, - acquired=acquired, - read=read, - ) - index += 1 - replace_readme_comments("my_kindle_stats.md", s, "my_kindle") - - def download_one_book(self, book, device, index, filetype="EBOK"): - title = book["title"] - asin = book["asin"] - try: - download_url = self.urls["download"].format( - filetype, - asin, - device["deviceSerialNumber"], - device["deviceType"], - device["customerId"], - ) - r = self.session.get(download_url, verify=False, stream=True) - r.raise_for_status() - name = re.findall( - r"filename\*=UTF-8''(.+)", r.headers["Content-Disposition"] - )[0] - name = urllib.parse.unquote(name) - _, extname = os.path.splitext(name) - name = title + extname - name = re.sub(r'[\\/:*?"<>|]', "_", name) - - ##### if you have many duplicate name books ##### - if self.to_resolve_duplicate_names: - name = f"{asin}_{name}" - if len(name) > self.cut_length: - name = name[: self.cut_length - 5] + name[-5:] - total_size = r.headers["Content-length"] - - out = os.path.join(self.out_dir, name) - out_dedrm = os.path.join(self.out_dedrm_dir, name) - logger.info( - f"({index + 1}/{self.total_to_download})downloading {name} {total_size} bytes" - ) - with open(out, "wb") as f: - for chunk in r.iter_content(chunk_size=512): - f.write(chunk) - logger.info(f"{name} downloaded") - # for dedrm - if self.dedrm: - try: - mb = MobiBook(out) - md1, md2 = mb.get_pid_meta_info() - totalpids = get_pid_list(md1, md2, [self.device_serial_number], []) - totalpids = list(set(totalpids)) - mb.make_drm_file(totalpids, out_dedrm) - except Exception as e: - logger.error("Dedrm failed for %s: %s", name, e) - pass - except Exception as e: - logger.error(str(e)) - logger.error(f"Title: {title}, Asin: {asin} download failed") - - def download_books(self, start_index=0, filetype="EBOK"): - # use default device - device = self.get_devices()[0] - self.device_serial_number = device["deviceSerialNumber"] - - logger.info( - f"Using default device serial Number: {device['deviceSerialNumber']}" - ) - books = self.get_all_books(filetype=filetype, start_index=start_index) - if start_index > 0: - print(f"resuming the download {start_index + 1}/{self.total_to_download}") - index = start_index - for book in books: - self.download_one_book(book, device, index, filetype) - index += 1 - if self.not_done: - logger.error( - f"\n\nNot All done!\nAmazon api limit when this download done.\n You can add command `--resume-from {index}` to resume download next time" - ) - else: - if not self.dedrm: - logger.info( - "\n\nAll done!\nNow you can use apprenticeharper's DeDRM tools " - "(https://github.com/apprenticeharper/DeDRM_tools)\n" - "with the following serial number to remove DRM: " - + device["deviceSerialNumber"] - ) - else: - logger.info( - "All done books saved in `DOWNLOAD`, dedrm files saved in `DEDRMS`" - ) - with open(os.path.join(self.out_dir, "key.txt"), "w") as f: - f.write(f"Key is: {device['deviceSerialNumber']}") +from kindle_download_helper import main if __name__ == "__main__": - - logger.setLevel(os.environ.get("LOGGING_LEVEL", "INFO")) - - logger.addHandler(logging.StreamHandler()) - parser = argparse.ArgumentParser() - parser.add_argument("csrf_token", help="amazon or amazon cn csrf token", nargs="?") - - cookie_group = parser.add_mutually_exclusive_group() - cookie_group.add_argument( - "--cookie", dest="cookie", default="", help="amazon or amazon cn cookie" - ) - cookie_group.add_argument( - "--cookie-file", dest="cookie_file", default="", help="load cookie local file" - ) - - parser.add_argument( - "--cn", - dest="domain", - action="store_const", - const="cn", - default="com", - help="if your account is an amazon.cn account", - ) - parser.add_argument( - "--jp", - dest="domain", - action="store_const", - const="jp", - default="com", - help="if your account is an amazon.jp account", - ) - parser.add_argument( - "--de", - dest="domain", - action="store_const", - const="de", - default="com", - help="if your account is an amazon.de account", - ) - parser.add_argument( - "--resume-from", - dest="index", - type=int, - default=1, - help="resume from the index if download failed", - ) - parser.add_argument( - "--cut-length", - dest="cut_length", - type=int, - default=100, - help="truncate the file name", - ) - parser.add_argument( - "-o", "--outdir", default=DEFAULT_OUT_DIR, help="dwonload output dir" - ) - parser.add_argument( - "-od", - "--outdedrmdir", - default=DEFAULT_OUT_DEDRM_DIR, - help="dwonload output dedrm dir", - ) - parser.add_argument( - "-s", - "--session-file", - default=DEFAULT_SESSION_FILE, - help="The reusable session dump file", - ) - parser.add_argument( - "--pdoc", - dest="filetype", - action="store_const", - const="PDOC", - default="EBOK", - help="to download personal documents or ebook", - ) - parser.add_argument( - "--resolve_duplicate_names", - dest="resolve_duplicate_names", - action="store_true", - help="Resolve duplicate names files to download", - ) - parser.add_argument( - "--readme", - dest="readme", - action="store_true", - help="If you want to generate kindle readme stats", - ) - parser.add_argument( - "--dedrm", - dest="dedrm", - action="store_true", - help="If you want to `dedrm` directly", - ) - - parser.add_argument( - "--list", - dest="list_only", - action="store_true", - help="just list books/pdoc, not to download", - ) - - options = parser.parse_args() - - if not os.path.exists(options.outdir): - os.makedirs(options.outdir) - # for dedrm - if not os.path.exists(options.outdedrmdir): - os.makedirs(options.outdedrmdir) - kindle = Kindle( - options.csrf_token, - options.domain, - options.outdir, - options.outdedrmdir, - options.cut_length, - session_file=options.session_file, - ) - # other args - kindle.to_resolve_duplicate_names = options.resolve_duplicate_names - kindle.dedrm = options.dedrm - - if options.cookie_file: - with open(options.cookie_file, "r") as f: - kindle.set_cookie_from_string(f.read()) - elif options.cookie: - kindle.set_cookie_from_string(options.cookie) - else: - kindle.is_browser_cookie = True - - if options.list_only: - kindle.get_devices() - print( - json.dumps( - kindle.get_all_books(filetype=options.filetype), - indent=4, - ensure_ascii=False, - ) - ) - exit() - - if options.readme: - # generate readme stats - kindle.make_kindle_stats_readme() - else: - kindle.download_books(start_index=options.index - 1, filetype=options.filetype) + main() diff --git a/kindle.ui b/kindle.ui index a172b4f..6fb869b 100644 --- a/kindle.ui +++ b/kindle.ui @@ -340,7 +340,7 @@ hr { height: 1px; border-width: 0; } - License: MIT + License: GPL V3 diff --git a/kindle_download_helper.py b/kindle_download_helper.py index 43e9294..bd5cc3f 100644 --- a/kindle_download_helper.py +++ b/kindle_download_helper.py @@ -7,7 +7,7 @@ from typing import NamedTuple from PySide6 import QtCore, QtGui, QtWidgets -import kindle +from kindle_download_helper import kindle as kindle from ui_kindle import Ui_MainDialog logger = logging.getLogger("kindle") diff --git a/kindle_download_helper/__init__.py b/kindle_download_helper/__init__.py new file mode 100644 index 0000000..be8959c --- /dev/null +++ b/kindle_download_helper/__init__.py @@ -0,0 +1,2 @@ +from kindle_download_helper.cli import main +from kindle_download_helper import kindle diff --git a/kindle_download_helper/__main__.py b/kindle_download_helper/__main__.py new file mode 100644 index 0000000..6fa7a56 --- /dev/null +++ b/kindle_download_helper/__main__.py @@ -0,0 +1 @@ +from cli import main diff --git a/kindle_download_helper/cli.py b/kindle_download_helper/cli.py new file mode 100644 index 0000000..91ecb14 --- /dev/null +++ b/kindle_download_helper/cli.py @@ -0,0 +1,170 @@ +from kindle_download_helper.kindle import Kindle +import argparse +import os +import urllib3 +import logging +import json + +from kindle_download_helper.config import ( + DEFAULT_OUT_DIR, + DEFAULT_SESSION_FILE, + DEFAULT_OUT_DEDRM_DIR, +) + +logger = logging.getLogger("kindle") +fh = logging.FileHandler(".error_books.log") +fh.setLevel(logging.ERROR) +logger.addHandler(fh) + +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + + +def main(): + logger.setLevel(os.environ.get("LOGGING_LEVEL", "INFO")) + + logger.addHandler(logging.StreamHandler()) + parser = argparse.ArgumentParser() + parser.add_argument("csrf_token", help="amazon or amazon cn csrf token", nargs="?") + + cookie_group = parser.add_mutually_exclusive_group() + cookie_group.add_argument( + "--cookie", dest="cookie", default="", help="amazon or amazon cn cookie" + ) + cookie_group.add_argument( + "--cookie-file", dest="cookie_file", default="", help="load cookie local file" + ) + + parser.add_argument( + "--cn", + dest="domain", + action="store_const", + const="cn", + default="com", + help="if your account is an amazon.cn account", + ) + parser.add_argument( + "--jp", + dest="domain", + action="store_const", + const="jp", + default="com", + help="if your account is an amazon.jp account", + ) + parser.add_argument( + "--de", + dest="domain", + action="store_const", + const="de", + default="com", + help="if your account is an amazon.de account", + ) + parser.add_argument( + "--resume-from", + dest="index", + type=int, + default=1, + help="resume from the index if download failed", + ) + parser.add_argument( + "--cut-length", + dest="cut_length", + type=int, + default=100, + help="truncate the file name", + ) + parser.add_argument( + "-o", "--outdir", default=DEFAULT_OUT_DIR, help="dwonload output dir" + ) + parser.add_argument( + "-od", + "--outdedrmdir", + default=DEFAULT_OUT_DEDRM_DIR, + help="dwonload output dedrm dir", + ) + parser.add_argument( + "-s", + "--session-file", + default=DEFAULT_SESSION_FILE, + help="The reusable session dump file", + ) + parser.add_argument( + "--pdoc", + dest="filetype", + action="store_const", + const="PDOC", + default="EBOK", + help="to download personal documents or ebook", + ) + parser.add_argument( + "--resolve_duplicate_names", + dest="resolve_duplicate_names", + action="store_true", + help="Resolve duplicate names files to download", + ) + parser.add_argument( + "--readme", + dest="readme", + action="store_true", + help="If you want to generate kindle readme stats", + ) + parser.add_argument( + "--dedrm", + dest="dedrm", + action="store_true", + help="If you want to `dedrm` directly", + ) + + parser.add_argument( + "--list", + dest="list_only", + action="store_true", + help="just list books/pdoc, not to download", + ) + + options = parser.parse_args() + + if not os.path.exists(options.outdir): + os.makedirs(options.outdir) + # for dedrm + if not os.path.exists(options.outdedrmdir): + os.makedirs(options.outdedrmdir) + kindle = Kindle( + options.csrf_token, + options.domain, + options.outdir, + options.outdedrmdir, + options.cut_length, + session_file=options.session_file, + ) + # other args + kindle.to_resolve_duplicate_names = options.resolve_duplicate_names + kindle.dedrm = options.dedrm + + if options.cookie_file: + with open(options.cookie_file, "r") as f: + kindle.set_cookie_from_string(f.read()) + elif options.cookie: + kindle.set_cookie_from_string(options.cookie) + else: + kindle.is_browser_cookie = True + + if options.list_only: + kindle.get_devices() + print( + json.dumps( + kindle.get_all_books(filetype=options.filetype), + indent=4, + ensure_ascii=False, + ) + ) + exit() + + if options.readme: + # generate readme stats + kindle.make_kindle_stats_readme() + else: + kindle.download_books(start_index=options.index - 1, filetype=options.filetype) + + +if __name__ == "__main__": + main() diff --git a/kindle_download_helper/config.py b/kindle_download_helper/config.py new file mode 100644 index 0000000..1e940bd --- /dev/null +++ b/kindle_download_helper/config.py @@ -0,0 +1,56 @@ +from faker import Faker + +DEFAULT_OUT_DIR = "DOWNLOADS" +DEFAULT_OUT_DEDRM_DIR = "DEDRMS" +DEFAULT_SESSION_FILE = ".kindle_session" + + +KINDLE_HEADER = { + "User-Agent": Faker().user_agent(), +} + +CONTENT_TYPES = { + "EBOK": "Ebook", + "PDOC": "KindlePDoc", +} + +KINDLE_URLS = { + "cn": { + "bookall": "https://www.amazon.cn/hz/mycd/myx#/home/content/booksAll", + "download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}&authPool=AmazonCN", + "payload": "https://www.amazon.cn/hz/mycd/ajax", + "insights": "https://www.amazon.cn/kindle/reading/insights/data", + "book_url": "https://www.amazon.cn/dp/{book_id}", + }, + "jp": { + "bookall": "https://www.amazon.jp/hz/mycd/myx#/home/content/booksAll", + "download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}", + "payload": "https://www.amazon.co.jp/hz/mycd/ajax", + "insights": "https://www.amazon.co.jp/kindle/reading/insights/data", + "book_url": "https://www.amazon.co.jp/dp/{book_id}", + }, + "de": { + "bookall": "https://www.amazon.de/hz/mycd/myx#/home/content/booksAll", + "download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}", + "payload": "https://www.amazon.de/hz/mycd/ajax", + "insights": "https://www.amazon.de/kindle/reading/insights/data", + "book_url": "https://www.amazon.de/dp/{book_id}", + }, + "com": { + "bookall": "https://www.amazon.com/hz/mycd/myx#/home/content/booksAll", + "download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}", + "payload": "https://www.amazon.com/hz/mycd/ajax", + "insights": "https://www.amazon.com/kindle/reading/insights/data", + "book_url": "https://www.amazon.com/dp/{book_id}", + }, +} + +# for kindle stats +GITHUB_README_COMMENTS = ( + "(\n)(.*)(\n)" +) +MY_KINDLE_STATS_INFO_HEAD = "## My Kindle Stats\n" +MY_KINDLE_STATS_INFO = "- I bought {books_len} books\n- I pushed {pdocs_len} docks\n- My first book is {first_book_title}, bought on {first_book_bought_date}\n- My first doc is {first_doc_title}, pushed on {first_doc_push_date}\n\n" + +KINDLE_TABLE_HEAD = "| ID | Title | Authors | Acquired | Read | \n | ---- | ---- | ---- | ---- | ---- |\n" +KINDLE_STAT_TEMPLATE = "| {id} | {title} | {authors} | {acquired} | {read} |\n" diff --git a/dedrm/__init__.py b/kindle_download_helper/dedrm/__init__.py similarity index 100% rename from dedrm/__init__.py rename to kindle_download_helper/dedrm/__init__.py diff --git a/dedrm/k4mobidedrm.py b/kindle_download_helper/dedrm/k4mobidedrm.py similarity index 100% rename from dedrm/k4mobidedrm.py rename to kindle_download_helper/dedrm/k4mobidedrm.py diff --git a/dedrm/kgenpids.py b/kindle_download_helper/dedrm/kgenpids.py similarity index 100% rename from dedrm/kgenpids.py rename to kindle_download_helper/dedrm/kgenpids.py diff --git a/dedrm/mobidedrm.py b/kindle_download_helper/dedrm/mobidedrm.py similarity index 100% rename from dedrm/mobidedrm.py rename to kindle_download_helper/dedrm/mobidedrm.py diff --git a/kindle_download_helper/kindle.py b/kindle_download_helper/kindle.py new file mode 100644 index 0000000..6437b16 --- /dev/null +++ b/kindle_download_helper/kindle.py @@ -0,0 +1,499 @@ +""" +Note some download code from: https://github.com/sghctoma/bOOkp +Great Thanks +""" + +import atexit +import html +import json +import logging +import os +import pickle +import re +import time +import urllib +from http.cookies import SimpleCookie + +import requests +import urllib3 +from requests.adapters import HTTPAdapter + +from kindle_download_helper.dedrm import MobiBook, get_pid_list +from kindle_download_helper.config import ( + KINDLE_URLS, + DEFAULT_OUT_DIR, + DEFAULT_SESSION_FILE, + DEFAULT_OUT_DEDRM_DIR, + CONTENT_TYPES, + KINDLE_STAT_TEMPLATE, +) +from kindle_download_helper.config import ( + MY_KINDLE_STATS_INFO_HEAD, + KINDLE_HEADER, + MY_KINDLE_STATS_INFO, + KINDLE_TABLE_HEAD, +) +from kindle_download_helper.utils import replace_readme_comments + +try: + import browser_cookie3 +except ModuleNotFoundError: + print("not found browser_cookie3 here, you should use --cookie command") + +logger = logging.getLogger("kindle") +fh = logging.FileHandler(".error_books.log") +fh.setLevel(logging.ERROR) +logger.addHandler(fh) + +urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + + +class Kindle: + def __init__( + self, + csrf_token=None, + domain="cn", + out_dir=DEFAULT_OUT_DIR, + out_dedrm_dir=DEFAULT_OUT_DEDRM_DIR, + cut_length=100, + session_file=DEFAULT_SESSION_FILE, + ): + self.urls = KINDLE_URLS[domain] + self._csrf_token = csrf_token + self.total_to_download = 0 + self.out_dir = out_dir + self.out_dedrm_dir = out_dedrm_dir + self.dedrm = False + self.cut_length = cut_length + self.not_done = False + self.session_file = session_file + self.session = self.make_session() + self.is_browser_cookie = False + self.to_resolve_duplicate_names = False + self.books_info_dict = {} + self.file_type_list = ["EBOOK", "PDOC"] + atexit.register(self.dump_session) + + def set_cookie(self, cookiejar): + if not cookiejar: + raise Exception("Please make sure your amazon cookie is right") + self.session.cookies.clear() + self.session.cookies.update(cookiejar) + + def set_cookie_from_string(self, cookie_string): + cj = self._parse_kindle_cookie(cookie_string) + self.set_cookie(cj) + + def dump_session(self): + with open(self.session_file, "wb") as f: + pickle.dump(self.session, f) + + @property + def csrf_token(self): + if not self._csrf_token: + self._csrf_token = self._get_csrf_token() + return self._csrf_token + + @csrf_token.setter + def csrf_token(self, csrf_token): + self._csrf_token = csrf_token + + def ensure_session_cookie(self): + if not self.session.cookies: + logger.debug("No cookie found, trying to load from browsers") + try: + self.set_cookie(browser_cookie3.load(domain_name="amazon")) + except: + print("not found browser_cookie3 here, you should use --cookie command") + + @staticmethod + def _parse_kindle_cookie(kindle_cookie): + cookie = SimpleCookie() + cookie.load(kindle_cookie) + cookies_dict = {} + cookiejar = None + for key, morsel in cookie.items(): + cookies_dict[key] = morsel.value + cookiejar = requests.utils.cookiejar_from_dict( + cookies_dict, cookiejar=None, overwrite=True + ) + return cookiejar + + def _get_csrf_token(self): + """ + TODO: I do not know why I have to get csrf token in the page not in this way + maybe figure out why in the future + """ + r = self.session.get(self.urls["bookall"]) + match = re.search(r'var csrfToken = "(.*)";', r.text) + if not match: + self.revoke_cookie_token(open_page=self.is_browser_cookie) + raise Exception( + "Can't get the csrf token, " + f"please refresh the page at {self.urls['bookall']} and retry" + ) + return match.group(1) + + def refresh_browser_cookie(self): + import webbrowser + + try: + webbrowser.open(self.urls["bookall"]) + except Exception: + pass + + def revoke_cookie_token(self, open_page=False): + # help user open it directly. + logger.info( + "Opening the url to get cookie...You can wait for the page to finish loading and retry" + ) + self._csrf_token = None # reset the token + # clear the cookies so the next time it can be reloaded from the browsers + self.session.cookies.clear() + if open_page: + self.refresh_browser_cookie() + + def ensure_cookie_token(self): + if not self._csrf_token: + if not self.session.cookies: + self.refresh_browser_cookie() + self.ensure_session_cookie() + self._csrf_token = self._get_csrf_token() + logger.debug( + f"session-id: { self.session.cookies.get_dict().get('session-id') }" + ) + + def make_session(self): + if os.path.exists(self.session_file): + with open(self.session_file, "rb") as f: + session = pickle.load(f) + else: + session = requests.Session() + session.headers.update(KINDLE_HEADER) + session.mount( + # will retry 5 times after 0.5, 1.0, 2.0, 4.0, ... seconds for + # (413, 429, 503) statuses + "https://", + HTTPAdapter(max_retries=urllib3.Retry(5, backoff_factor=0.5)), + ) + + logger.debug(f"user-agent: { session.headers.get('User-Agent') }") + return session + + def get_devices(self): + """ + This method must be called before each download, so we ensure + the session cookies before it is called + """ + self.ensure_cookie_token() + + payload = {"param": {"GetDevices": {}}} + r = self.session.post( + self.urls["payload"], + data={ + "data": json.dumps(payload), + "csrfToken": self.csrf_token, + }, + ) + r.raise_for_status() + devices = r.json() + if devices.get("error"): + self.revoke_cookie_token(open_page=True) + raise Exception( + f"Error: {devices.get('error')}, please visit {self.urls['bookall']} to revoke the csrftoken and cookie" + ) + devices = r.json()["GetDevices"]["devices"] + # sleep get device first time. + logger.info("Amazon open their bot check will sleep 3s") + time.sleep(3) + if not devices: + raise Exception("No devices are bound to this account") + return [device for device in devices if "deviceSerialNumber" in device] + + def get_all_books(self, start_index=0, filetype="EBOK"): + """ + TODO: refactor this function + """ + # some info + if filetype == "PDOC": + logger.info( + "It will take some time to get all PDOC books list, please wait" + ) + startIndex = start_index + batchSize = 100 + payload = { + "param": { + "OwnershipData": { + "sortOrder": "DESCENDING", + "sortIndex": "DATE", + "startIndex": startIndex, + "batchSize": batchSize, + "contentType": CONTENT_TYPES[filetype], + "itemStatus": ["Active"], + } + } + } + + if filetype == "EBOK": + payload["param"]["OwnershipData"].update( + { + "originType": ["Purchase"], + } + ) + else: + batchSize = 18 + payload["param"]["OwnershipData"].update( + { + "batchSize": batchSize, + "isExtendedMYK": False, + } + ) + + books = [] + ### added by yihong0618 2022.06.27 + ### this ugly code is for amazon open their bot check + ### if the bot check close + ### will delete the try and try code + break_times = 0 + while True: + # anyway sleep 0.5 + time.sleep(0.5) + r = self.session.post( + self.urls["payload"], + data={"data": json.dumps(payload), "csrfToken": self.csrf_token}, + ) + # try three times for bot check + if r.status_code == 503: + # sleep and try again + sleep_seconds = 5 + 2 * break_times + time.sleep(sleep_seconds) + logger.info( + f"Amazon open their bot check will sleep {sleep_seconds}s and try this api again, now index: {startIndex}/{self.total_to_download}" + ) + if break_times < 7: + break_times += 1 + r = self.session.post( + self.urls["payload"], + data={"data": json.dumps(payload), "csrfToken": self.csrf_token}, + ) + if not r.ok: + if r.status_code == 503: + time.sleep(sleep_seconds) + logger.info( + f"Amazon open their bot check will sleep {sleep_seconds}s last time and try this api again, now index: {startIndex}/{self.total_to_download}" + ) + logger.info(f"Next time fail will break the loop") + r = self.session.post( + self.urls["payload"], + data={ + "data": json.dumps(payload), + "csrfToken": self.csrf_token, + }, + ) + break_times += 1 + if not r.ok: + # amazon limit this api + if startIndex == 0: + logger.error( + "Amazon api limit when this download done.\n Please run it again`" + ) + else: + self.not_done = True + logger.error( + "Amazon api limit when this download done.\n You can add command `--resume-from %s`", + startIndex, + ) + break + result = r.json() + if not result.get("success", True): + logger.error("get all books error: %s", result.get("error")) + break + items = result["OwnershipData"]["items"] + for item in items: + if filetype == "PDOC": + item["title"] = html.unescape(item["title"]) + item["authors"] = html.unescape(item.pop("author", "")) + if item.get("readStatus", "") == "READ": + self.books_info_dict[item["asin"]] = item + + books.extend(items) + self.total_to_download = result["OwnershipData"]["numberOfItems"] + + if result["OwnershipData"]["hasMoreItems"]: + startIndex += batchSize + payload["param"]["OwnershipData"]["startIndex"] = startIndex + else: + break + return books + + def _get_reading_stats(self): + insights_url = self.urls["insights"] + r = self.session.get(insights_url) + if r.ok: + return r.json() + logger.error(f"Something is wrong get the stats data url: {insights_url}") + raise Exception(f"Something is wrong get the stats data url: {insights_url}") + + def _make_one_book_stats_info(self, book_info): + book_url = self.urls["book_url"] + asin = book_info["asin"] + book = self.books_info_dict.get(asin) + if not book: + return + book_title = book.get("title", "") + # filter the brackets in the book title + book_title = re.sub( + r"(\([^)]*\))|(\([^)]*\))|(\【[^)]*\】)|(\[[^)]*\])|(\s)", "", book_title + ) + book_title = book_title.replace(" ", "") + if book.get("category", "") == "KindleEBook": + book_url = book_url.format(book_id=asin) + book_title = f"[{book_title}]({book_url})" + book_authors = book.get("authors") + if len(book_authors) > 10: + book_authors = ",".join(book_authors.split(",")[:2]) + "..." + # only keep date + read = book_info.get("date_read")[:10] + acquired = ( + book.get("acquiredDate", "") + .replace("年", "-") + .replace("月", "-") + .replace("日", "") + ) + return book_title, book_authors, acquired, read + + def make_kindle_stats_readme(self): + ebooks = self.get_all_books(filetype="EBOK") + pdocs = self.get_all_books(filetype="PDOC") + first_ebook, first_pdoc = None, None + reading_stats = self._get_reading_stats() + read_list = reading_stats.get("goal_info", {}).get("titles_read") + if pdocs: + first_pdoc = pdocs[-1] + if first_ebook: + first_ebook = ebooks[-1] + + s = MY_KINDLE_STATS_INFO_HEAD + kindle_stats_str = "" + if pdocs or ebooks: + kindle_stats_str = MY_KINDLE_STATS_INFO.format( + books_len=len(ebooks) if ebooks else 0, + pdocs_len=len(pdocs) if pdocs else 0, + first_book_title=first_ebook["title"] if first_ebook else "", + first_book_bought_date=first_ebook["acquiredDate"] + if first_ebook + else "", + first_doc_title=first_pdoc["title"] if first_pdoc else "", + first_doc_push_date=first_pdoc["acquiredDate"] if first_pdoc else "", + ) + s += kindle_stats_str + s += KINDLE_TABLE_HEAD + index = 1 + for book_info in read_list: + if not self._make_one_book_stats_info(book_info): + continue + book_title, book_authors, acquired, read = self._make_one_book_stats_info( + book_info + ) + s += KINDLE_STAT_TEMPLATE.format( + id=str(index), + title=book_title, + authors=book_authors, + acquired=acquired, + read=read, + ) + index += 1 + if not os.path.exists("my_kindle_stats.md"): + with open("my_kindle_stats.md", "a") as f: + f.write( + """ + + """ + ) + replace_readme_comments("my_kindle_stats.md", s, "my_kindle") + + def download_one_book(self, book, device, index, filetype="EBOK"): + title = book["title"] + asin = book["asin"] + try: + download_url = self.urls["download"].format( + filetype, + asin, + device["deviceSerialNumber"], + device["deviceType"], + device["customerId"], + ) + r = self.session.get(download_url, verify=False, stream=True) + r.raise_for_status() + name = re.findall( + r"filename\*=UTF-8''(.+)", r.headers["Content-Disposition"] + )[0] + name = urllib.parse.unquote(name) + _, extname = os.path.splitext(name) + name = title + extname + name = re.sub(r'[\\/:*?"<>|]', "_", name) + + ##### if you have many duplicate name books ##### + if self.to_resolve_duplicate_names: + name = f"{asin}_{name}" + if len(name) > self.cut_length: + name = name[: self.cut_length - 5] + name[-5:] + total_size = r.headers["Content-length"] + + out = os.path.join(self.out_dir, name) + out_dedrm = os.path.join(self.out_dedrm_dir, name) + logger.info( + f"({index + 1}/{self.total_to_download})downloading {name} {total_size} bytes" + ) + with open(out, "wb") as f: + for chunk in r.iter_content(chunk_size=512): + f.write(chunk) + logger.info(f"{name} downloaded") + # for dedrm + if self.dedrm: + try: + mb = MobiBook(out) + md1, md2 = mb.get_pid_meta_info() + totalpids = get_pid_list(md1, md2, [self.device_serial_number], []) + totalpids = list(set(totalpids)) + mb.make_drm_file(totalpids, out_dedrm) + except Exception as e: + logger.error("Dedrm failed for %s: %s", name, e) + pass + except Exception as e: + logger.error(str(e)) + logger.error(f"Title: {title}, Asin: {asin} download failed") + + def download_books(self, start_index=0, filetype="EBOK"): + # use default device + device = self.get_devices()[0] + self.device_serial_number = device["deviceSerialNumber"] + + logger.info( + f"Using default device serial Number: {device['deviceSerialNumber']}" + ) + books = self.get_all_books(filetype=filetype, start_index=start_index) + if start_index > 0: + print(f"resuming the download {start_index + 1}/{self.total_to_download}") + index = start_index + for book in books: + self.download_one_book(book, device, index, filetype) + index += 1 + if self.not_done: + logger.error( + f"\n\nNot All done!\nAmazon api limit when this download done.\n You can add command `--resume-from {index}` to resume download next time" + ) + else: + if not self.dedrm: + logger.info( + "\n\nAll done!\nNow you can use apprenticeharper's DeDRM tools " + "(https://github.com/apprenticeharper/DeDRM_tools)\n" + "with the following serial number to remove DRM: " + + device["deviceSerialNumber"] + ) + else: + logger.info( + "All done books saved in `DOWNLOAD`, dedrm files saved in `DEDRMS`" + ) + with open(os.path.join(self.out_dir, "key.txt"), "w") as f: + f.write(f"Key is: {device['deviceSerialNumber']}") diff --git a/kindle_download_helper/utils.py b/kindle_download_helper/utils.py new file mode 100644 index 0000000..7ef798d --- /dev/null +++ b/kindle_download_helper/utils.py @@ -0,0 +1,17 @@ +import re +from kindle_download_helper.config import GITHUB_README_COMMENTS + + +def replace_readme_comments(file_name, comment_str, comments_name): + with open(file_name, "r+", encoding="UTF-8") as f: + text = f.read() + # regrex sub from github readme comments + text = re.sub( + GITHUB_README_COMMENTS.format(name=comments_name), + r"\1{}\n\3".format(comment_str), + text, + flags=re.DOTALL, + ) + f.seek(0) + f.write(text) + f.truncate() diff --git a/my_kindle_stats.md b/my_kindle_stats.md index 4ceb268..3a1da90 100644 --- a/my_kindle_stats.md +++ b/my_kindle_stats.md @@ -1,56 +1,3 @@ - -## My kindle stats -- I bought 38 books - - I pushed 871 docks - - My first book is 知乎周刊·商业的细节, bought on 2013年9月16日 - - My first doc is 天涯头条:《刘军宁:文明社会与言论自由,事关社会安定和对权力的制约》, bought on 2015年3月16日 - -| ID | Title | Authors | Acquired | Read | - | ---- | ---- | ---- | ---- | ---- | -| 1 | 其主之声 | 斯坦尼斯瓦夫·莱姆 | 2022-5-3 | 2022-05-03 | -| 2 | 奇鸟行状录 | 村上春树 | 2022-3-22 | 2022-03-22 | -| 3 | Origin原型機-第03卷 | Boichi | 2021-6-1 | 2022-02-11 | -| 4 | Origin原型機-第05卷 | Boichi | 2021-6-1 | 2022-02-11 | -| 5 | Origin原型機-第07卷 | Boichi | 2021-6-1 | 2022-02-11 | -| 6 | 第一人称单数 | 村上春树 | 2021-11-28 | 2022-02-05 | -| 7 | 挽救计划 | 安迪·威尔 | 2021-11-18 | 2021-11-18 | -| 8 | [日本名家小说集](https://www.amazon.cn/dp/B08P8KXYZ1) | 东野圭吾, 伊坂幸太郎... | 2021-5-1 | 2021-11-17 | -| 9 | 两京十五日 | 马伯庸 | 2021-10-3 | 2021-10-05 | -| 10 | 绝叫 | 叶真中显 | 2021-8-17 | 2021-08-22 | -| 11 | 炎拳-第02卷 | 105965398155@vol.moe... | 2020-11-8 | 2021-07-24 | -| 12 | Origin原型機-第02卷 | Boichi | 2021-6-1 | 2021-06-04 | -| 13 | Origin原型機-第01卷 | Boichi | 2021-6-1 | 2021-06-03 | -| 14 | 殺手寓言-第15卷 | 南勝久 | 2021-4-9 | 2021-04-10 | -| 15 | 殺手寓言-第14卷 | 南勝久 | 2021-4-6 | 2021-04-09 | -| 16 | 殺手寓言-第13卷 | 南勝久 | 2021-4-6 | 2021-04-09 | -| 17 | 殺手寓言-第12卷 | 南勝久 | 2021-4-6 | 2021-04-08 | -| 18 | 殺手寓言-第11卷 | 南勝久 | 2021-4-6 | 2021-04-08 | -| 19 | 殺手寓言-第10卷 | 南勝久 | 2021-4-6 | 2021-04-08 | -| 20 | 殺手寓言-第09卷 | 南勝久 | 2021-4-6 | 2021-04-07 | -| 21 | 殺手寓言-第08卷 | 南勝久 | 2021-4-6 | 2021-04-07 | -| 22 | 殺手寓言-第07卷 | 南勝久 | 2021-4-4 | 2021-04-05 | -| 23 | 殺手寓言-第06卷 | 南勝久 | 2021-4-4 | 2021-04-05 | -| 24 | 殺手寓言-第05卷 | 南勝久 | 2021-4-4 | 2021-04-05 | -| 25 | 殺手寓言-第04卷 | 南勝久 | 2021-4-4 | 2021-04-04 | -| 26 | 殺手寓言-第03卷 | 南勝久 | 2021-3-31 | 2021-04-03 | -| 27 | 殺手寓言-第02卷 | 南勝久 | 2021-3-31 | 2021-04-02 | -| 28 | 殺手寓言-第01卷 | 南勝久 | 2021-3-31 | 2021-04-01 | -| 29 | 夏日重現-第12卷 | 田中靖規 | 2021-2-19 | 2021-03-06 | -| 30 | 夏日重現-第11卷 | 田中靖規 | 2021-2-19 | 2021-03-05 | -| 31 | 夏日重現-第10卷 | 田中靖規 | 2021-2-19 | 2021-03-04 | -| 32 | 夏日重現-第09卷 | 田中靖規 | 2021-2-19 | 2021-03-04 | -| 33 | 夏日重現-第08卷 | 田中靖規 | 2021-2-19 | 2021-02-28 | -| 34 | 夏日重現-第07卷 | 田中靖規 | 2021-2-19 | 2021-02-28 | -| 35 | 夏日重現-第06卷 | 田中靖規 | 2021-2-19 | 2021-02-26 | -| 36 | 夏日重現-第05卷 | 田中靖規 | 2021-2-19 | 2021-02-23 | -| 37 | 夏日重現-第04卷 | 田中靖規 | 2021-2-19 | 2021-02-21 | -| 38 | 夏日重現-第03卷 | 田中靖規 | 2021-2-19 | 2021-02-21 | -| 39 | 夏日重現-第02卷 | 田中靖規 | 2021-2-19 | 2021-02-19 | -| 40 | 夏日重現-第01卷 | 田中靖規 | 2021-2-19 | 2021-02-19 | -| 41 | 1984 | George Orwell... | 2015-7-1 | 2021-02-18 | -| 42 | 炎拳-第01卷 | 105965398155@vol.moe... | 2020-11-8 | 2020-11-10 | -| 43 | 1984 | George Orwell... | 2015-7-1 | 2020-04-25 | -| 44 | 锦衣之下 | 蓝色狮 | 2020-1-7 | 2020-01-27 | - + \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..1a4cfd8 --- /dev/null +++ b/setup.py @@ -0,0 +1,23 @@ +from setuptools import find_packages, setup + +setup( + name="kindle_download", + author="yihong0618", + author_email="zouzou0208@gmail.com", + url="https://github.com/yihong0618/kindle_download_helper", + license="GPL V3", + version="1.1.1", + description="Download all your kindle books and `DeDRM` script.", + long_description="Download all your kindle books and `DeDRM` script.", + packages=find_packages(), + include_package_data=True, + install_requires=[ + "requests", + "browser-cookie3", + "faker", + "pywin32 ; sys_platform == 'win32'" + ], + entry_points={ + "console_scripts": ["kindle_download = kindle_download_helper.cli:main"], + }, +) diff --git a/ui_kindle.py b/ui_kindle.py index a625741..d558270 100644 --- a/ui_kindle.py +++ b/ui_kindle.py @@ -300,6 +300,6 @@ class Ui_MainDialog(object): self.label_6.setText(QCoreApplication.translate("MainDialog", u"\u9690\u79c1\u58f0\u660e\uff1a\u6211\u4eec\u4e0d\u4f1a\u6536\u96c6\u4efb\u4f55\u7528\u6237\u4fe1\u606f\uff0c\u8bf7\u653e\u5fc3\u4f7f\u7528", None)) self.label_3.setText(QCoreApplication.translate("MainDialog", u"Copyright 2022 \u00a9 [yihong0618](https://github.com/yihong0618) and [frostming](https://github.com/frostming)", None)) self.label_4.setText(QCoreApplication.translate("MainDialog", u"GitHub: ", None)) - self.label_5.setText(QCoreApplication.translate("MainDialog", u"License: MIT", None)) + self.label_5.setText(QCoreApplication.translate("MainDialog", u"License: GPL V3", None)) # retranslateUi