diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index 8705997..0000000
--- a/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2022 yihong, frostming and contributors
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/README.md b/README.md
index 7fac828..61da482 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
# Kindle_download_helper
-Download all your kindle books script.
+Download all your kindle books and `DeDRM` script.
## 安装 Kindle_download_helper
@@ -17,11 +17,15 @@ Download all your kindle books script.
- Mac 新手指南 by @chongiscool,见 [#76](https://github.com/yihong0618/Kindle_download_helper/issues/76)
+
### Cli 安装使用
1. python3
2. requirements
+or just pip
+pip3 install kindle_download
+
```python
python3 --version #查看 python 版本
```
@@ -36,6 +40,7 @@ pip3 install -r requirements.txt
```python
python3 kindle.py --h #查看使用参数
+kindle_download --h # pip
usage: kindle.py [-h] [--cookie COOKIE | --cookie-file COOKIE_FILE] [--cn] [--jp] [--de] [--resume-from INDEX]
[--cut-length CUT_LENGTH] [-o OUTDIR] [-od OUTDEDRMDIR] [-s SESSION_FILE] [--pdoc] [--resolve_duplicate_names]
@@ -66,7 +71,7 @@ options:
--resolve_duplicate_names
Resolve duplicate names files to download
--readme If you want to generate kindle readme stats
- --dedrm If you want to `dedrm` directly
+ --dedrm If you want to `DeDRM` directly
--list just list books/pdoc, not to download
```
@@ -77,6 +82,8 @@ options:
```python
python3 kindle.py --dedrm --cn ## --dedrm 移除 DRM
+or
+kindle_download --dedrm --cn
```
(推荐) 手动输入 cookie、csrfToken 进行下载
@@ -84,6 +91,9 @@ python3 kindle.py --dedrm --cn ## --dedrm 移除 DRM
```python
python3 kindle.py ${csrfToken} --cookie ${cookie} --dedrm --cn #下载国区 Kindle 书籍并移除 DRM
python3 kindle.py ${csrfToken} --cookie ${cookie} --dedrm #下载美区 Kindle 书籍
+or
+kindle_download ${csrfToken} --cookie ${cookie} --dedrm --cn #下载国区 Kindle 书籍并移除 DRM
+kindle_download ${csrfToken} --cookie ${cookie} --dedrm #下载美区 Kindle 书籍
```
### 获取 cookie
@@ -154,7 +164,7 @@ python3 kindle.py --cn --cookie ${cookie} ${csrfToken}
- cookie 和 csrf token 会过期,重新刷新下 amazon 的页面就行
- 程序会自动在命令执行的目录下创建 `DOWNLOADS` 目录,书会下载在 `DOWNLOADS` 里
-- 支持 mobi 类型的文件直接 dedrm `--dedrm` 生成的文件在 `DEDRMS` 里
+- 支持 mobi 类型的文件直接 DeDRM `--dedrm` 生成的文件在 `DEDRMS` 里
- 如果你用 [DeDRM_tools](https://github.com/apprenticeharper/DeDRM_tools) 解密 key 存在 key.txt 里
- 或者直接拖进 Calibre 里 please google it.
- 如果过程中失败了可以使用 e.g. `--resume-from ${num}`
@@ -169,7 +179,8 @@ python3 kindle.py --cn --cookie ${cookie} ${csrfToken}
- The cookie and csrf token will expire, just refresh the amazon page again.
- The program will automatically create `DOWNLOADS` directory under the command execution directory, the book will be downloaded in `DOWNLOADS` directory.
-- If you use [DeDRM_tools](https://github.com/apprenticeharper/DeDRM_tools) to decrypt the key, it will be stored in key.txt
+- Support DeDRM with `--dedrm`
+- or use [DeDRM_tools](https://github.com/apprenticeharper/DeDRM_tools) to decrypt the key, it will be stored in key.txt
- or just drag it into Calibre. Please google it.
- If the process fails you can use e.g. `--resume-from ${num}`
- If the name is too long, you can add: `-cut-length 80` to truncate the file name
diff --git a/icon_rc.py b/icon_rc.py
index dba7aa8..79b9cb5 100644
--- a/icon_rc.py
+++ b/icon_rc.py
@@ -1816,7 +1816,7 @@ qt_resource_struct = b"\
\x00\x00\x00\x0e\x00\x02\x00\x00\x00\x01\x00\x00\x00\x03\
\x00\x00\x00\x00\x00\x00\x00\x00\
\x00\x00\x00$\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\
-\x00\x00\x01\x82\x0f!\xe2\xc8\
+\x00\x00\x01\x82X\xe5\xc8\x87\
"
def qInitResources():
diff --git a/kindle.py b/kindle.py
index f31a796..e96ee43 100644
--- a/kindle.py
+++ b/kindle.py
@@ -1,691 +1,5 @@
-"""
-Note some download code from: https://github.com/sghctoma/bOOkp
-Great Thanks
-"""
-
-import argparse
-import atexit
-import html
-import json
-import logging
-import os
-import pickle
-import re
-import time
-import urllib
-from http.cookies import SimpleCookie
-
-import requests
-import urllib3
-from faker import Faker
-from requests.adapters import HTTPAdapter
-
-from dedrm import MobiBook, get_pid_list
-
-try:
- import browser_cookie3
-except ModuleNotFoundError:
- print("not found browser_cookie3 here, you should use --cookie command")
-
-logger = logging.getLogger("kindle")
-fh = logging.FileHandler(".error_books.log")
-fh.setLevel(logging.ERROR)
-logger.addHandler(fh)
-
-urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
-
-DEFAULT_OUT_DIR = "DOWNLOADS"
-DEFAULT_OUT_DEDRM_DIR = "DEDRMS"
-DEFAULT_SESSION_FILE = ".kindle_session"
-
-
-KINDLE_HEADER = {
- "User-Agent": Faker().user_agent(),
-}
-
-CONTENT_TYPES = {
- "EBOK": "Ebook",
- "PDOC": "KindlePDoc",
-}
-
-KINDLE_URLS = {
- "cn": {
- "bookall": "https://www.amazon.cn/hz/mycd/myx#/home/content/booksAll",
- "download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}&authPool=AmazonCN",
- "payload": "https://www.amazon.cn/hz/mycd/ajax",
- "insights": "https://www.amazon.cn/kindle/reading/insights/data",
- "book_url": "https://www.amazon.cn/dp/{book_id}",
- },
- "jp": {
- "bookall": "https://www.amazon.jp/hz/mycd/myx#/home/content/booksAll",
- "download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}",
- "payload": "https://www.amazon.co.jp/hz/mycd/ajax",
- "insights": "https://www.amazon.co.jp/kindle/reading/insights/data",
- "book_url": "https://www.amazon.co.jp/dp/{book_id}",
- },
- "de": {
- "bookall": "https://www.amazon.de/hz/mycd/myx#/home/content/booksAll",
- "download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}",
- "payload": "https://www.amazon.de/hz/mycd/ajax",
- "insights": "https://www.amazon.de/kindle/reading/insights/data",
- "book_url": "https://www.amazon.de/dp/{book_id}",
- },
- "com": {
- "bookall": "https://www.amazon.com/hz/mycd/myx#/home/content/booksAll",
- "download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}",
- "payload": "https://www.amazon.com/hz/mycd/ajax",
- "insights": "https://www.amazon.com/kindle/reading/insights/data",
- "book_url": "https://www.amazon.com/dp/{book_id}",
- },
-}
-
-# for kindle stats
-GITHUB_README_COMMENTS = (
- "(\n)(.*)(\n)"
-)
-MY_KINDLE_STATS_INFO_HEAD = "## My Kindle Stats\n"
-MY_KINDLE_STATS_INFO = "- I bought {books_len} books\n \
-- I pushed {pdocs_len} docks\n \
-- My first book is {first_book_title}, bought on {first_book_bought_date}\n \
-- My first doc is {first_doc_title}, pushed on {first_doc_push_date}\n\n"
-
-KINDLE_TABLE_HEAD = "| ID | Title | Authors | Acquired | Read | \n | ---- | ---- | ---- | ---- | ---- |\n"
-KINDLE_STAT_TEMPLATE = "| {id} | {title} | {authors} | {acquired} | {read} |\n"
-
-
-def replace_readme_comments(file_name, comment_str, comments_name):
- with open(file_name, "r+", encoding="UTF-8") as f:
- text = f.read()
- # regrex sub from github readme comments
- text = re.sub(
- GITHUB_README_COMMENTS.format(name=comments_name),
- r"\1{}\n\3".format(comment_str),
- text,
- flags=re.DOTALL,
- )
- f.seek(0)
- f.write(text)
- f.truncate()
-
-
-class Kindle:
- def __init__(
- self,
- csrf_token=None,
- domain="cn",
- out_dir=DEFAULT_OUT_DIR,
- out_dedrm_dir=DEFAULT_OUT_DEDRM_DIR,
- cut_length=100,
- session_file=DEFAULT_SESSION_FILE,
- ):
- self.urls = KINDLE_URLS[domain]
- self._csrf_token = csrf_token
- self.total_to_download = 0
- self.out_dir = out_dir
- self.out_dedrm_dir = out_dedrm_dir
- self.dedrm = False
- self.cut_length = cut_length
- self.not_done = False
- self.session_file = session_file
- self.session = self.make_session()
- self.is_browser_cookie = False
- self.to_resolve_duplicate_names = False
- self.books_info_dict = {}
- self.file_type_list = ["EBOOK", "PDOC"]
- atexit.register(self.dump_session)
-
- def set_cookie(self, cookiejar):
- if not cookiejar:
- raise Exception("Please make sure your amazon cookie is right")
- self.session.cookies.clear()
- self.session.cookies.update(cookiejar)
-
- def set_cookie_from_string(self, cookie_string):
- cj = self._parse_kindle_cookie(cookie_string)
- self.set_cookie(cj)
-
- def dump_session(self):
- with open(self.session_file, "wb") as f:
- pickle.dump(self.session, f)
-
- @property
- def csrf_token(self):
- if not self._csrf_token:
- self._csrf_token = self._get_csrf_token()
- return self._csrf_token
-
- @csrf_token.setter
- def csrf_token(self, csrf_token):
- self._csrf_token = csrf_token
-
- def ensure_session_cookie(self):
- if not self.session.cookies:
- logger.debug("No cookie found, trying to load from browsers")
- try:
- self.set_cookie(browser_cookie3.load(domain_name="amazon"))
- except:
- print("not found browser_cookie3 here, you should use --cookie command")
-
- @staticmethod
- def _parse_kindle_cookie(kindle_cookie):
- cookie = SimpleCookie()
- cookie.load(kindle_cookie)
- cookies_dict = {}
- cookiejar = None
- for key, morsel in cookie.items():
- cookies_dict[key] = morsel.value
- cookiejar = requests.utils.cookiejar_from_dict(
- cookies_dict, cookiejar=None, overwrite=True
- )
- return cookiejar
-
- def _get_csrf_token(self):
- """
- TODO: I do not know why I have to get csrf token in the page not in this way
- maybe figure out why in the future
- """
- r = self.session.get(self.urls["bookall"])
- match = re.search(r'var csrfToken = "(.*)";', r.text)
- if not match:
- self.revoke_cookie_token(open_page=self.is_browser_cookie)
- raise Exception(
- "Can't get the csrf token, "
- f"please refresh the page at {self.urls['bookall']} and retry"
- )
- return match.group(1)
-
- def refresh_browser_cookie(self):
- import webbrowser
-
- try:
- webbrowser.open(self.urls["bookall"])
- except Exception:
- pass
-
- def revoke_cookie_token(self, open_page=False):
- # help user open it directly.
- logger.info(
- "Opening the url to get cookie...You can wait for the page to finish loading and retry"
- )
- self._csrf_token = None # reset the token
- # clear the cookies so the next time it can be reloaded from the browsers
- self.session.cookies.clear()
- if open_page:
- self.refresh_browser_cookie()
-
- def ensure_cookie_token(self):
- if not self._csrf_token:
- if not self.session.cookies:
- self.refresh_browser_cookie()
- self.ensure_session_cookie()
- self._csrf_token = self._get_csrf_token()
- logger.debug(
- f"session-id: { self.session.cookies.get_dict().get('session-id') }"
- )
-
- def make_session(self):
- if os.path.exists(self.session_file):
- with open(self.session_file, "rb") as f:
- session = pickle.load(f)
- else:
- session = requests.Session()
- session.headers.update(KINDLE_HEADER)
- session.mount(
- # will retry 5 times after 0.5, 1.0, 2.0, 4.0, ... seconds for
- # (413, 429, 503) statuses
- "https://",
- HTTPAdapter(max_retries=urllib3.Retry(5, backoff_factor=0.5)),
- )
-
- logger.debug(f"user-agent: { session.headers.get('User-Agent') }")
- return session
-
- def get_devices(self):
- """
- This method must be called before each download, so we ensure
- the session cookies before it is called
- """
- self.ensure_cookie_token()
-
- payload = {"param": {"GetDevices": {}}}
- r = self.session.post(
- self.urls["payload"],
- data={
- "data": json.dumps(payload),
- "csrfToken": self.csrf_token,
- },
- )
- r.raise_for_status()
- devices = r.json()
- if devices.get("error"):
- self.revoke_cookie_token(open_page=True)
- raise Exception(
- f"Error: {devices.get('error')}, please visit {self.urls['bookall']} to revoke the csrftoken and cookie"
- )
- devices = r.json()["GetDevices"]["devices"]
- # sleep get device first time.
- logger.info("Amazon open their bot check will sleep 3s")
- time.sleep(3)
- if not devices:
- raise Exception("No devices are bound to this account")
- return [device for device in devices if "deviceSerialNumber" in device]
-
- def get_all_books(self, start_index=0, filetype="EBOK"):
- """
- TODO: refactor this function
- """
- # some info
- if filetype == "PDOC":
- logger.info(
- "It will take some time to get all PDOC books list, please wait"
- )
- startIndex = start_index
- batchSize = 100
- payload = {
- "param": {
- "OwnershipData": {
- "sortOrder": "DESCENDING",
- "sortIndex": "DATE",
- "startIndex": startIndex,
- "batchSize": batchSize,
- "contentType": CONTENT_TYPES[filetype],
- "itemStatus": ["Active"],
- }
- }
- }
-
- if filetype == "EBOK":
- payload["param"]["OwnershipData"].update(
- {
- "originType": ["Purchase"],
- }
- )
- else:
- batchSize = 18
- payload["param"]["OwnershipData"].update(
- {
- "batchSize": batchSize,
- "isExtendedMYK": False,
- }
- )
-
- books = []
- ### added by yihong0618 2022.06.27
- ### this ugly code is for amazon open their bot check
- ### if the bot check close
- ### will delete the try and try code
- break_times = 0
- while True:
- # anyway sleep 0.5
- time.sleep(0.5)
- r = self.session.post(
- self.urls["payload"],
- data={"data": json.dumps(payload), "csrfToken": self.csrf_token},
- )
- # try three times for bot check
- if r.status_code == 503:
- # sleep and try again
- sleep_seconds = 5 + 2 * break_times
- time.sleep(sleep_seconds)
- logger.info(
- f"Amazon open their bot check will sleep {sleep_seconds}s and try this api again, now index: {startIndex}/{self.total_to_download}"
- )
- if break_times < 7:
- break_times += 1
- r = self.session.post(
- self.urls["payload"],
- data={"data": json.dumps(payload), "csrfToken": self.csrf_token},
- )
- if not r.ok:
- if r.status_code == 503:
- time.sleep(sleep_seconds)
- logger.info(
- f"Amazon open their bot check will sleep {sleep_seconds}s last time and try this api again, now index: {startIndex}/{self.total_to_download}"
- )
- logger.info(f"Next time fail will break the loop")
- r = self.session.post(
- self.urls["payload"],
- data={
- "data": json.dumps(payload),
- "csrfToken": self.csrf_token,
- },
- )
- break_times += 1
- if not r.ok:
- # amazon limit this api
- if startIndex == 0:
- logger.error(
- "Amazon api limit when this download done.\n Please run it again`"
- )
- else:
- self.not_done = True
- logger.error(
- "Amazon api limit when this download done.\n You can add command `--resume-from %s`",
- startIndex,
- )
- break
- result = r.json()
- if not result.get("success", True):
- logger.error("get all books error: %s", result.get("error"))
- break
- items = result["OwnershipData"]["items"]
- for item in items:
- if filetype == "PDOC":
- item["title"] = html.unescape(item["title"])
- item["authors"] = html.unescape(item.pop("author", ""))
- if item.get("readStatus", "") == "READ":
- self.books_info_dict[item["asin"]] = item
-
- books.extend(items)
- if not self.total_to_download:
- self.total_to_download = result["OwnershipData"]["numberOfItems"]
-
- if result["OwnershipData"]["hasMoreItems"]:
- startIndex += batchSize
- payload["param"]["OwnershipData"]["startIndex"] = startIndex
- else:
- break
- return books
-
- def _get_reading_stats(self):
- insights_url = self.urls["insights"]
- r = self.session.get(insights_url)
- if r.ok:
- return r.json()
- logger.error(f"Something is wrong get the stats data url: {insights_url}")
- raise Exception(f"Something is wrong get the stats data url: {insights_url}")
-
- def _make_one_book_stats_info(self, book_info):
- book_url = self.urls["book_url"]
- asin = book_info["asin"]
- book = self.books_info_dict.get(asin)
- book_title = book.get("title", "")
- # filter the brackets in the book title
- book_title = re.sub(
- r"(\([^)]*\))|(\([^)]*\))|(\【[^)]*\】)|(\[[^)]*\])|(\s)", "", book_title
- )
- book_title = book_title.replace(" ", "")
- if book.get("category", "") == "KindleEBook":
- book_url = book_url.format(book_id=asin)
- book_title = f"[{book_title}]({book_url})"
- book_authors = book.get("authors")
- if len(book_authors) > 10:
- book_authors = ",".join(book_authors.split(",")[:2]) + "..."
- # only keep date
- read = book_info.get("date_read")[:10]
- acquired = (
- book.get("acquiredDate", "")
- .replace("年", "-")
- .replace("月", "-")
- .replace("日", "")
- )
- return book_title, book_authors, acquired, read
-
- def make_kindle_stats_readme(self):
- reading_stats = self._get_reading_stats()
- read_list = reading_stats.get("goal_info", {}).get("titles_read")
- ebooks = self.get_all_books(filetype="EBOK")
- pdocs = self.get_all_books(filetype="PDOC")
- first_ebook, first_pdoc = ebooks[-1], pdocs[-1]
- print(len(self.books_info_dict.keys()), first_ebook, first_pdoc)
- print(read_list)
-
- s = MY_KINDLE_STATS_INFO_HEAD
- kindle_stats_str = MY_KINDLE_STATS_INFO.format(
- books_len=len(ebooks),
- pdocs_len=len(pdocs),
- first_book_title=first_ebook["title"],
- first_book_bought_date=first_ebook["acquiredDate"],
- first_doc_title=first_pdoc["title"],
- first_doc_push_date=first_pdoc["acquiredDate"],
- )
- s += kindle_stats_str
- s += KINDLE_TABLE_HEAD
- index = 1
- for book_info in read_list:
- book_title, book_authors, acquired, read = self._make_one_book_stats_info(
- book_info
- )
- s += KINDLE_STAT_TEMPLATE.format(
- id=str(index),
- title=book_title,
- authors=book_authors,
- acquired=acquired,
- read=read,
- )
- index += 1
- replace_readme_comments("my_kindle_stats.md", s, "my_kindle")
-
- def download_one_book(self, book, device, index, filetype="EBOK"):
- title = book["title"]
- asin = book["asin"]
- try:
- download_url = self.urls["download"].format(
- filetype,
- asin,
- device["deviceSerialNumber"],
- device["deviceType"],
- device["customerId"],
- )
- r = self.session.get(download_url, verify=False, stream=True)
- r.raise_for_status()
- name = re.findall(
- r"filename\*=UTF-8''(.+)", r.headers["Content-Disposition"]
- )[0]
- name = urllib.parse.unquote(name)
- _, extname = os.path.splitext(name)
- name = title + extname
- name = re.sub(r'[\\/:*?"<>|]', "_", name)
-
- ##### if you have many duplicate name books #####
- if self.to_resolve_duplicate_names:
- name = f"{asin}_{name}"
- if len(name) > self.cut_length:
- name = name[: self.cut_length - 5] + name[-5:]
- total_size = r.headers["Content-length"]
-
- out = os.path.join(self.out_dir, name)
- out_dedrm = os.path.join(self.out_dedrm_dir, name)
- logger.info(
- f"({index + 1}/{self.total_to_download})downloading {name} {total_size} bytes"
- )
- with open(out, "wb") as f:
- for chunk in r.iter_content(chunk_size=512):
- f.write(chunk)
- logger.info(f"{name} downloaded")
- # for dedrm
- if self.dedrm:
- try:
- mb = MobiBook(out)
- md1, md2 = mb.get_pid_meta_info()
- totalpids = get_pid_list(md1, md2, [self.device_serial_number], [])
- totalpids = list(set(totalpids))
- mb.make_drm_file(totalpids, out_dedrm)
- except Exception as e:
- logger.error("Dedrm failed for %s: %s", name, e)
- pass
- except Exception as e:
- logger.error(str(e))
- logger.error(f"Title: {title}, Asin: {asin} download failed")
-
- def download_books(self, start_index=0, filetype="EBOK"):
- # use default device
- device = self.get_devices()[0]
- self.device_serial_number = device["deviceSerialNumber"]
-
- logger.info(
- f"Using default device serial Number: {device['deviceSerialNumber']}"
- )
- books = self.get_all_books(filetype=filetype, start_index=start_index)
- if start_index > 0:
- print(f"resuming the download {start_index + 1}/{self.total_to_download}")
- index = start_index
- for book in books:
- self.download_one_book(book, device, index, filetype)
- index += 1
- if self.not_done:
- logger.error(
- f"\n\nNot All done!\nAmazon api limit when this download done.\n You can add command `--resume-from {index}` to resume download next time"
- )
- else:
- if not self.dedrm:
- logger.info(
- "\n\nAll done!\nNow you can use apprenticeharper's DeDRM tools "
- "(https://github.com/apprenticeharper/DeDRM_tools)\n"
- "with the following serial number to remove DRM: "
- + device["deviceSerialNumber"]
- )
- else:
- logger.info(
- "All done books saved in `DOWNLOAD`, dedrm files saved in `DEDRMS`"
- )
- with open(os.path.join(self.out_dir, "key.txt"), "w") as f:
- f.write(f"Key is: {device['deviceSerialNumber']}")
+from kindle_download_helper import main
if __name__ == "__main__":
-
- logger.setLevel(os.environ.get("LOGGING_LEVEL", "INFO"))
-
- logger.addHandler(logging.StreamHandler())
- parser = argparse.ArgumentParser()
- parser.add_argument("csrf_token", help="amazon or amazon cn csrf token", nargs="?")
-
- cookie_group = parser.add_mutually_exclusive_group()
- cookie_group.add_argument(
- "--cookie", dest="cookie", default="", help="amazon or amazon cn cookie"
- )
- cookie_group.add_argument(
- "--cookie-file", dest="cookie_file", default="", help="load cookie local file"
- )
-
- parser.add_argument(
- "--cn",
- dest="domain",
- action="store_const",
- const="cn",
- default="com",
- help="if your account is an amazon.cn account",
- )
- parser.add_argument(
- "--jp",
- dest="domain",
- action="store_const",
- const="jp",
- default="com",
- help="if your account is an amazon.jp account",
- )
- parser.add_argument(
- "--de",
- dest="domain",
- action="store_const",
- const="de",
- default="com",
- help="if your account is an amazon.de account",
- )
- parser.add_argument(
- "--resume-from",
- dest="index",
- type=int,
- default=1,
- help="resume from the index if download failed",
- )
- parser.add_argument(
- "--cut-length",
- dest="cut_length",
- type=int,
- default=100,
- help="truncate the file name",
- )
- parser.add_argument(
- "-o", "--outdir", default=DEFAULT_OUT_DIR, help="dwonload output dir"
- )
- parser.add_argument(
- "-od",
- "--outdedrmdir",
- default=DEFAULT_OUT_DEDRM_DIR,
- help="dwonload output dedrm dir",
- )
- parser.add_argument(
- "-s",
- "--session-file",
- default=DEFAULT_SESSION_FILE,
- help="The reusable session dump file",
- )
- parser.add_argument(
- "--pdoc",
- dest="filetype",
- action="store_const",
- const="PDOC",
- default="EBOK",
- help="to download personal documents or ebook",
- )
- parser.add_argument(
- "--resolve_duplicate_names",
- dest="resolve_duplicate_names",
- action="store_true",
- help="Resolve duplicate names files to download",
- )
- parser.add_argument(
- "--readme",
- dest="readme",
- action="store_true",
- help="If you want to generate kindle readme stats",
- )
- parser.add_argument(
- "--dedrm",
- dest="dedrm",
- action="store_true",
- help="If you want to `dedrm` directly",
- )
-
- parser.add_argument(
- "--list",
- dest="list_only",
- action="store_true",
- help="just list books/pdoc, not to download",
- )
-
- options = parser.parse_args()
-
- if not os.path.exists(options.outdir):
- os.makedirs(options.outdir)
- # for dedrm
- if not os.path.exists(options.outdedrmdir):
- os.makedirs(options.outdedrmdir)
- kindle = Kindle(
- options.csrf_token,
- options.domain,
- options.outdir,
- options.outdedrmdir,
- options.cut_length,
- session_file=options.session_file,
- )
- # other args
- kindle.to_resolve_duplicate_names = options.resolve_duplicate_names
- kindle.dedrm = options.dedrm
-
- if options.cookie_file:
- with open(options.cookie_file, "r") as f:
- kindle.set_cookie_from_string(f.read())
- elif options.cookie:
- kindle.set_cookie_from_string(options.cookie)
- else:
- kindle.is_browser_cookie = True
-
- if options.list_only:
- kindle.get_devices()
- print(
- json.dumps(
- kindle.get_all_books(filetype=options.filetype),
- indent=4,
- ensure_ascii=False,
- )
- )
- exit()
-
- if options.readme:
- # generate readme stats
- kindle.make_kindle_stats_readme()
- else:
- kindle.download_books(start_index=options.index - 1, filetype=options.filetype)
+ main()
diff --git a/kindle.ui b/kindle.ui
index a172b4f..6fb869b 100644
--- a/kindle.ui
+++ b/kindle.ui
@@ -340,7 +340,7 @@ hr { height: 1px; border-width: 0; }
-
- License: MIT
+ License: GPL V3
diff --git a/kindle_download_helper.py b/kindle_download_helper.py
index 43e9294..bd5cc3f 100644
--- a/kindle_download_helper.py
+++ b/kindle_download_helper.py
@@ -7,7 +7,7 @@ from typing import NamedTuple
from PySide6 import QtCore, QtGui, QtWidgets
-import kindle
+from kindle_download_helper import kindle as kindle
from ui_kindle import Ui_MainDialog
logger = logging.getLogger("kindle")
diff --git a/kindle_download_helper/__init__.py b/kindle_download_helper/__init__.py
new file mode 100644
index 0000000..be8959c
--- /dev/null
+++ b/kindle_download_helper/__init__.py
@@ -0,0 +1,2 @@
+from kindle_download_helper.cli import main
+from kindle_download_helper import kindle
diff --git a/kindle_download_helper/__main__.py b/kindle_download_helper/__main__.py
new file mode 100644
index 0000000..6fa7a56
--- /dev/null
+++ b/kindle_download_helper/__main__.py
@@ -0,0 +1 @@
+from cli import main
diff --git a/kindle_download_helper/cli.py b/kindle_download_helper/cli.py
new file mode 100644
index 0000000..91ecb14
--- /dev/null
+++ b/kindle_download_helper/cli.py
@@ -0,0 +1,170 @@
+from kindle_download_helper.kindle import Kindle
+import argparse
+import os
+import urllib3
+import logging
+import json
+
+from kindle_download_helper.config import (
+ DEFAULT_OUT_DIR,
+ DEFAULT_SESSION_FILE,
+ DEFAULT_OUT_DEDRM_DIR,
+)
+
+logger = logging.getLogger("kindle")
+fh = logging.FileHandler(".error_books.log")
+fh.setLevel(logging.ERROR)
+logger.addHandler(fh)
+
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+
+def main():
+ logger.setLevel(os.environ.get("LOGGING_LEVEL", "INFO"))
+
+ logger.addHandler(logging.StreamHandler())
+ parser = argparse.ArgumentParser()
+ parser.add_argument("csrf_token", help="amazon or amazon cn csrf token", nargs="?")
+
+ cookie_group = parser.add_mutually_exclusive_group()
+ cookie_group.add_argument(
+ "--cookie", dest="cookie", default="", help="amazon or amazon cn cookie"
+ )
+ cookie_group.add_argument(
+ "--cookie-file", dest="cookie_file", default="", help="load cookie local file"
+ )
+
+ parser.add_argument(
+ "--cn",
+ dest="domain",
+ action="store_const",
+ const="cn",
+ default="com",
+ help="if your account is an amazon.cn account",
+ )
+ parser.add_argument(
+ "--jp",
+ dest="domain",
+ action="store_const",
+ const="jp",
+ default="com",
+ help="if your account is an amazon.jp account",
+ )
+ parser.add_argument(
+ "--de",
+ dest="domain",
+ action="store_const",
+ const="de",
+ default="com",
+ help="if your account is an amazon.de account",
+ )
+ parser.add_argument(
+ "--resume-from",
+ dest="index",
+ type=int,
+ default=1,
+ help="resume from the index if download failed",
+ )
+ parser.add_argument(
+ "--cut-length",
+ dest="cut_length",
+ type=int,
+ default=100,
+ help="truncate the file name",
+ )
+ parser.add_argument(
+ "-o", "--outdir", default=DEFAULT_OUT_DIR, help="dwonload output dir"
+ )
+ parser.add_argument(
+ "-od",
+ "--outdedrmdir",
+ default=DEFAULT_OUT_DEDRM_DIR,
+ help="dwonload output dedrm dir",
+ )
+ parser.add_argument(
+ "-s",
+ "--session-file",
+ default=DEFAULT_SESSION_FILE,
+ help="The reusable session dump file",
+ )
+ parser.add_argument(
+ "--pdoc",
+ dest="filetype",
+ action="store_const",
+ const="PDOC",
+ default="EBOK",
+ help="to download personal documents or ebook",
+ )
+ parser.add_argument(
+ "--resolve_duplicate_names",
+ dest="resolve_duplicate_names",
+ action="store_true",
+ help="Resolve duplicate names files to download",
+ )
+ parser.add_argument(
+ "--readme",
+ dest="readme",
+ action="store_true",
+ help="If you want to generate kindle readme stats",
+ )
+ parser.add_argument(
+ "--dedrm",
+ dest="dedrm",
+ action="store_true",
+ help="If you want to `dedrm` directly",
+ )
+
+ parser.add_argument(
+ "--list",
+ dest="list_only",
+ action="store_true",
+ help="just list books/pdoc, not to download",
+ )
+
+ options = parser.parse_args()
+
+ if not os.path.exists(options.outdir):
+ os.makedirs(options.outdir)
+ # for dedrm
+ if not os.path.exists(options.outdedrmdir):
+ os.makedirs(options.outdedrmdir)
+ kindle = Kindle(
+ options.csrf_token,
+ options.domain,
+ options.outdir,
+ options.outdedrmdir,
+ options.cut_length,
+ session_file=options.session_file,
+ )
+ # other args
+ kindle.to_resolve_duplicate_names = options.resolve_duplicate_names
+ kindle.dedrm = options.dedrm
+
+ if options.cookie_file:
+ with open(options.cookie_file, "r") as f:
+ kindle.set_cookie_from_string(f.read())
+ elif options.cookie:
+ kindle.set_cookie_from_string(options.cookie)
+ else:
+ kindle.is_browser_cookie = True
+
+ if options.list_only:
+ kindle.get_devices()
+ print(
+ json.dumps(
+ kindle.get_all_books(filetype=options.filetype),
+ indent=4,
+ ensure_ascii=False,
+ )
+ )
+ exit()
+
+ if options.readme:
+ # generate readme stats
+ kindle.make_kindle_stats_readme()
+ else:
+ kindle.download_books(start_index=options.index - 1, filetype=options.filetype)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/kindle_download_helper/config.py b/kindle_download_helper/config.py
new file mode 100644
index 0000000..1e940bd
--- /dev/null
+++ b/kindle_download_helper/config.py
@@ -0,0 +1,56 @@
+from faker import Faker
+
+DEFAULT_OUT_DIR = "DOWNLOADS"
+DEFAULT_OUT_DEDRM_DIR = "DEDRMS"
+DEFAULT_SESSION_FILE = ".kindle_session"
+
+
+KINDLE_HEADER = {
+ "User-Agent": Faker().user_agent(),
+}
+
+CONTENT_TYPES = {
+ "EBOK": "Ebook",
+ "PDOC": "KindlePDoc",
+}
+
+KINDLE_URLS = {
+ "cn": {
+ "bookall": "https://www.amazon.cn/hz/mycd/myx#/home/content/booksAll",
+ "download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}&authPool=AmazonCN",
+ "payload": "https://www.amazon.cn/hz/mycd/ajax",
+ "insights": "https://www.amazon.cn/kindle/reading/insights/data",
+ "book_url": "https://www.amazon.cn/dp/{book_id}",
+ },
+ "jp": {
+ "bookall": "https://www.amazon.jp/hz/mycd/myx#/home/content/booksAll",
+ "download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}",
+ "payload": "https://www.amazon.co.jp/hz/mycd/ajax",
+ "insights": "https://www.amazon.co.jp/kindle/reading/insights/data",
+ "book_url": "https://www.amazon.co.jp/dp/{book_id}",
+ },
+ "de": {
+ "bookall": "https://www.amazon.de/hz/mycd/myx#/home/content/booksAll",
+ "download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}",
+ "payload": "https://www.amazon.de/hz/mycd/ajax",
+ "insights": "https://www.amazon.de/kindle/reading/insights/data",
+ "book_url": "https://www.amazon.de/dp/{book_id}",
+ },
+ "com": {
+ "bookall": "https://www.amazon.com/hz/mycd/myx#/home/content/booksAll",
+ "download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}",
+ "payload": "https://www.amazon.com/hz/mycd/ajax",
+ "insights": "https://www.amazon.com/kindle/reading/insights/data",
+ "book_url": "https://www.amazon.com/dp/{book_id}",
+ },
+}
+
+# for kindle stats
+GITHUB_README_COMMENTS = (
+ "(\n)(.*)(\n)"
+)
+MY_KINDLE_STATS_INFO_HEAD = "## My Kindle Stats\n"
+MY_KINDLE_STATS_INFO = "- I bought {books_len} books\n- I pushed {pdocs_len} docks\n- My first book is {first_book_title}, bought on {first_book_bought_date}\n- My first doc is {first_doc_title}, pushed on {first_doc_push_date}\n\n"
+
+KINDLE_TABLE_HEAD = "| ID | Title | Authors | Acquired | Read | \n | ---- | ---- | ---- | ---- | ---- |\n"
+KINDLE_STAT_TEMPLATE = "| {id} | {title} | {authors} | {acquired} | {read} |\n"
diff --git a/dedrm/__init__.py b/kindle_download_helper/dedrm/__init__.py
similarity index 100%
rename from dedrm/__init__.py
rename to kindle_download_helper/dedrm/__init__.py
diff --git a/dedrm/k4mobidedrm.py b/kindle_download_helper/dedrm/k4mobidedrm.py
similarity index 100%
rename from dedrm/k4mobidedrm.py
rename to kindle_download_helper/dedrm/k4mobidedrm.py
diff --git a/dedrm/kgenpids.py b/kindle_download_helper/dedrm/kgenpids.py
similarity index 100%
rename from dedrm/kgenpids.py
rename to kindle_download_helper/dedrm/kgenpids.py
diff --git a/dedrm/mobidedrm.py b/kindle_download_helper/dedrm/mobidedrm.py
similarity index 100%
rename from dedrm/mobidedrm.py
rename to kindle_download_helper/dedrm/mobidedrm.py
diff --git a/kindle_download_helper/kindle.py b/kindle_download_helper/kindle.py
new file mode 100644
index 0000000..6437b16
--- /dev/null
+++ b/kindle_download_helper/kindle.py
@@ -0,0 +1,499 @@
+"""
+Note some download code from: https://github.com/sghctoma/bOOkp
+Great Thanks
+"""
+
+import atexit
+import html
+import json
+import logging
+import os
+import pickle
+import re
+import time
+import urllib
+from http.cookies import SimpleCookie
+
+import requests
+import urllib3
+from requests.adapters import HTTPAdapter
+
+from kindle_download_helper.dedrm import MobiBook, get_pid_list
+from kindle_download_helper.config import (
+ KINDLE_URLS,
+ DEFAULT_OUT_DIR,
+ DEFAULT_SESSION_FILE,
+ DEFAULT_OUT_DEDRM_DIR,
+ CONTENT_TYPES,
+ KINDLE_STAT_TEMPLATE,
+)
+from kindle_download_helper.config import (
+ MY_KINDLE_STATS_INFO_HEAD,
+ KINDLE_HEADER,
+ MY_KINDLE_STATS_INFO,
+ KINDLE_TABLE_HEAD,
+)
+from kindle_download_helper.utils import replace_readme_comments
+
+try:
+ import browser_cookie3
+except ModuleNotFoundError:
+ print("not found browser_cookie3 here, you should use --cookie command")
+
+logger = logging.getLogger("kindle")
+fh = logging.FileHandler(".error_books.log")
+fh.setLevel(logging.ERROR)
+logger.addHandler(fh)
+
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+
+
+class Kindle:
+ def __init__(
+ self,
+ csrf_token=None,
+ domain="cn",
+ out_dir=DEFAULT_OUT_DIR,
+ out_dedrm_dir=DEFAULT_OUT_DEDRM_DIR,
+ cut_length=100,
+ session_file=DEFAULT_SESSION_FILE,
+ ):
+ self.urls = KINDLE_URLS[domain]
+ self._csrf_token = csrf_token
+ self.total_to_download = 0
+ self.out_dir = out_dir
+ self.out_dedrm_dir = out_dedrm_dir
+ self.dedrm = False
+ self.cut_length = cut_length
+ self.not_done = False
+ self.session_file = session_file
+ self.session = self.make_session()
+ self.is_browser_cookie = False
+ self.to_resolve_duplicate_names = False
+ self.books_info_dict = {}
+ self.file_type_list = ["EBOOK", "PDOC"]
+ atexit.register(self.dump_session)
+
+ def set_cookie(self, cookiejar):
+ if not cookiejar:
+ raise Exception("Please make sure your amazon cookie is right")
+ self.session.cookies.clear()
+ self.session.cookies.update(cookiejar)
+
+ def set_cookie_from_string(self, cookie_string):
+ cj = self._parse_kindle_cookie(cookie_string)
+ self.set_cookie(cj)
+
+ def dump_session(self):
+ with open(self.session_file, "wb") as f:
+ pickle.dump(self.session, f)
+
+ @property
+ def csrf_token(self):
+ if not self._csrf_token:
+ self._csrf_token = self._get_csrf_token()
+ return self._csrf_token
+
+ @csrf_token.setter
+ def csrf_token(self, csrf_token):
+ self._csrf_token = csrf_token
+
+ def ensure_session_cookie(self):
+ if not self.session.cookies:
+ logger.debug("No cookie found, trying to load from browsers")
+ try:
+ self.set_cookie(browser_cookie3.load(domain_name="amazon"))
+ except:
+ print("not found browser_cookie3 here, you should use --cookie command")
+
+ @staticmethod
+ def _parse_kindle_cookie(kindle_cookie):
+ cookie = SimpleCookie()
+ cookie.load(kindle_cookie)
+ cookies_dict = {}
+ cookiejar = None
+ for key, morsel in cookie.items():
+ cookies_dict[key] = morsel.value
+ cookiejar = requests.utils.cookiejar_from_dict(
+ cookies_dict, cookiejar=None, overwrite=True
+ )
+ return cookiejar
+
+ def _get_csrf_token(self):
+ """
+ TODO: I do not know why I have to get csrf token in the page not in this way
+ maybe figure out why in the future
+ """
+ r = self.session.get(self.urls["bookall"])
+ match = re.search(r'var csrfToken = "(.*)";', r.text)
+ if not match:
+ self.revoke_cookie_token(open_page=self.is_browser_cookie)
+ raise Exception(
+ "Can't get the csrf token, "
+ f"please refresh the page at {self.urls['bookall']} and retry"
+ )
+ return match.group(1)
+
+ def refresh_browser_cookie(self):
+ import webbrowser
+
+ try:
+ webbrowser.open(self.urls["bookall"])
+ except Exception:
+ pass
+
+ def revoke_cookie_token(self, open_page=False):
+ # help user open it directly.
+ logger.info(
+ "Opening the url to get cookie...You can wait for the page to finish loading and retry"
+ )
+ self._csrf_token = None # reset the token
+ # clear the cookies so the next time it can be reloaded from the browsers
+ self.session.cookies.clear()
+ if open_page:
+ self.refresh_browser_cookie()
+
+ def ensure_cookie_token(self):
+ if not self._csrf_token:
+ if not self.session.cookies:
+ self.refresh_browser_cookie()
+ self.ensure_session_cookie()
+ self._csrf_token = self._get_csrf_token()
+ logger.debug(
+ f"session-id: { self.session.cookies.get_dict().get('session-id') }"
+ )
+
+ def make_session(self):
+ if os.path.exists(self.session_file):
+ with open(self.session_file, "rb") as f:
+ session = pickle.load(f)
+ else:
+ session = requests.Session()
+ session.headers.update(KINDLE_HEADER)
+ session.mount(
+ # will retry 5 times after 0.5, 1.0, 2.0, 4.0, ... seconds for
+ # (413, 429, 503) statuses
+ "https://",
+ HTTPAdapter(max_retries=urllib3.Retry(5, backoff_factor=0.5)),
+ )
+
+ logger.debug(f"user-agent: { session.headers.get('User-Agent') }")
+ return session
+
+ def get_devices(self):
+ """
+ This method must be called before each download, so we ensure
+ the session cookies before it is called
+ """
+ self.ensure_cookie_token()
+
+ payload = {"param": {"GetDevices": {}}}
+ r = self.session.post(
+ self.urls["payload"],
+ data={
+ "data": json.dumps(payload),
+ "csrfToken": self.csrf_token,
+ },
+ )
+ r.raise_for_status()
+ devices = r.json()
+ if devices.get("error"):
+ self.revoke_cookie_token(open_page=True)
+ raise Exception(
+ f"Error: {devices.get('error')}, please visit {self.urls['bookall']} to revoke the csrftoken and cookie"
+ )
+ devices = r.json()["GetDevices"]["devices"]
+ # sleep get device first time.
+ logger.info("Amazon open their bot check will sleep 3s")
+ time.sleep(3)
+ if not devices:
+ raise Exception("No devices are bound to this account")
+ return [device for device in devices if "deviceSerialNumber" in device]
+
+ def get_all_books(self, start_index=0, filetype="EBOK"):
+ """
+ TODO: refactor this function
+ """
+ # some info
+ if filetype == "PDOC":
+ logger.info(
+ "It will take some time to get all PDOC books list, please wait"
+ )
+ startIndex = start_index
+ batchSize = 100
+ payload = {
+ "param": {
+ "OwnershipData": {
+ "sortOrder": "DESCENDING",
+ "sortIndex": "DATE",
+ "startIndex": startIndex,
+ "batchSize": batchSize,
+ "contentType": CONTENT_TYPES[filetype],
+ "itemStatus": ["Active"],
+ }
+ }
+ }
+
+ if filetype == "EBOK":
+ payload["param"]["OwnershipData"].update(
+ {
+ "originType": ["Purchase"],
+ }
+ )
+ else:
+ batchSize = 18
+ payload["param"]["OwnershipData"].update(
+ {
+ "batchSize": batchSize,
+ "isExtendedMYK": False,
+ }
+ )
+
+ books = []
+ ### added by yihong0618 2022.06.27
+ ### this ugly code is for amazon open their bot check
+ ### if the bot check close
+ ### will delete the try and try code
+ break_times = 0
+ while True:
+ # anyway sleep 0.5
+ time.sleep(0.5)
+ r = self.session.post(
+ self.urls["payload"],
+ data={"data": json.dumps(payload), "csrfToken": self.csrf_token},
+ )
+ # try three times for bot check
+ if r.status_code == 503:
+ # sleep and try again
+ sleep_seconds = 5 + 2 * break_times
+ time.sleep(sleep_seconds)
+ logger.info(
+ f"Amazon open their bot check will sleep {sleep_seconds}s and try this api again, now index: {startIndex}/{self.total_to_download}"
+ )
+ if break_times < 7:
+ break_times += 1
+ r = self.session.post(
+ self.urls["payload"],
+ data={"data": json.dumps(payload), "csrfToken": self.csrf_token},
+ )
+ if not r.ok:
+ if r.status_code == 503:
+ time.sleep(sleep_seconds)
+ logger.info(
+ f"Amazon open their bot check will sleep {sleep_seconds}s last time and try this api again, now index: {startIndex}/{self.total_to_download}"
+ )
+ logger.info(f"Next time fail will break the loop")
+ r = self.session.post(
+ self.urls["payload"],
+ data={
+ "data": json.dumps(payload),
+ "csrfToken": self.csrf_token,
+ },
+ )
+ break_times += 1
+ if not r.ok:
+ # amazon limit this api
+ if startIndex == 0:
+ logger.error(
+ "Amazon api limit when this download done.\n Please run it again`"
+ )
+ else:
+ self.not_done = True
+ logger.error(
+ "Amazon api limit when this download done.\n You can add command `--resume-from %s`",
+ startIndex,
+ )
+ break
+ result = r.json()
+ if not result.get("success", True):
+ logger.error("get all books error: %s", result.get("error"))
+ break
+ items = result["OwnershipData"]["items"]
+ for item in items:
+ if filetype == "PDOC":
+ item["title"] = html.unescape(item["title"])
+ item["authors"] = html.unescape(item.pop("author", ""))
+ if item.get("readStatus", "") == "READ":
+ self.books_info_dict[item["asin"]] = item
+
+ books.extend(items)
+ self.total_to_download = result["OwnershipData"]["numberOfItems"]
+
+ if result["OwnershipData"]["hasMoreItems"]:
+ startIndex += batchSize
+ payload["param"]["OwnershipData"]["startIndex"] = startIndex
+ else:
+ break
+ return books
+
+ def _get_reading_stats(self):
+ insights_url = self.urls["insights"]
+ r = self.session.get(insights_url)
+ if r.ok:
+ return r.json()
+ logger.error(f"Something is wrong get the stats data url: {insights_url}")
+ raise Exception(f"Something is wrong get the stats data url: {insights_url}")
+
+ def _make_one_book_stats_info(self, book_info):
+ book_url = self.urls["book_url"]
+ asin = book_info["asin"]
+ book = self.books_info_dict.get(asin)
+ if not book:
+ return
+ book_title = book.get("title", "")
+ # filter the brackets in the book title
+ book_title = re.sub(
+ r"(\([^)]*\))|(\([^)]*\))|(\【[^)]*\】)|(\[[^)]*\])|(\s)", "", book_title
+ )
+ book_title = book_title.replace(" ", "")
+ if book.get("category", "") == "KindleEBook":
+ book_url = book_url.format(book_id=asin)
+ book_title = f"[{book_title}]({book_url})"
+ book_authors = book.get("authors")
+ if len(book_authors) > 10:
+ book_authors = ",".join(book_authors.split(",")[:2]) + "..."
+ # only keep date
+ read = book_info.get("date_read")[:10]
+ acquired = (
+ book.get("acquiredDate", "")
+ .replace("年", "-")
+ .replace("月", "-")
+ .replace("日", "")
+ )
+ return book_title, book_authors, acquired, read
+
+ def make_kindle_stats_readme(self):
+ ebooks = self.get_all_books(filetype="EBOK")
+ pdocs = self.get_all_books(filetype="PDOC")
+ first_ebook, first_pdoc = None, None
+ reading_stats = self._get_reading_stats()
+ read_list = reading_stats.get("goal_info", {}).get("titles_read")
+ if pdocs:
+ first_pdoc = pdocs[-1]
+ if first_ebook:
+ first_ebook = ebooks[-1]
+
+ s = MY_KINDLE_STATS_INFO_HEAD
+ kindle_stats_str = ""
+ if pdocs or ebooks:
+ kindle_stats_str = MY_KINDLE_STATS_INFO.format(
+ books_len=len(ebooks) if ebooks else 0,
+ pdocs_len=len(pdocs) if pdocs else 0,
+ first_book_title=first_ebook["title"] if first_ebook else "",
+ first_book_bought_date=first_ebook["acquiredDate"]
+ if first_ebook
+ else "",
+ first_doc_title=first_pdoc["title"] if first_pdoc else "",
+ first_doc_push_date=first_pdoc["acquiredDate"] if first_pdoc else "",
+ )
+ s += kindle_stats_str
+ s += KINDLE_TABLE_HEAD
+ index = 1
+ for book_info in read_list:
+ if not self._make_one_book_stats_info(book_info):
+ continue
+ book_title, book_authors, acquired, read = self._make_one_book_stats_info(
+ book_info
+ )
+ s += KINDLE_STAT_TEMPLATE.format(
+ id=str(index),
+ title=book_title,
+ authors=book_authors,
+ acquired=acquired,
+ read=read,
+ )
+ index += 1
+ if not os.path.exists("my_kindle_stats.md"):
+ with open("my_kindle_stats.md", "a") as f:
+ f.write(
+ """
+
+ """
+ )
+ replace_readme_comments("my_kindle_stats.md", s, "my_kindle")
+
+ def download_one_book(self, book, device, index, filetype="EBOK"):
+ title = book["title"]
+ asin = book["asin"]
+ try:
+ download_url = self.urls["download"].format(
+ filetype,
+ asin,
+ device["deviceSerialNumber"],
+ device["deviceType"],
+ device["customerId"],
+ )
+ r = self.session.get(download_url, verify=False, stream=True)
+ r.raise_for_status()
+ name = re.findall(
+ r"filename\*=UTF-8''(.+)", r.headers["Content-Disposition"]
+ )[0]
+ name = urllib.parse.unquote(name)
+ _, extname = os.path.splitext(name)
+ name = title + extname
+ name = re.sub(r'[\\/:*?"<>|]', "_", name)
+
+ ##### if you have many duplicate name books #####
+ if self.to_resolve_duplicate_names:
+ name = f"{asin}_{name}"
+ if len(name) > self.cut_length:
+ name = name[: self.cut_length - 5] + name[-5:]
+ total_size = r.headers["Content-length"]
+
+ out = os.path.join(self.out_dir, name)
+ out_dedrm = os.path.join(self.out_dedrm_dir, name)
+ logger.info(
+ f"({index + 1}/{self.total_to_download})downloading {name} {total_size} bytes"
+ )
+ with open(out, "wb") as f:
+ for chunk in r.iter_content(chunk_size=512):
+ f.write(chunk)
+ logger.info(f"{name} downloaded")
+ # for dedrm
+ if self.dedrm:
+ try:
+ mb = MobiBook(out)
+ md1, md2 = mb.get_pid_meta_info()
+ totalpids = get_pid_list(md1, md2, [self.device_serial_number], [])
+ totalpids = list(set(totalpids))
+ mb.make_drm_file(totalpids, out_dedrm)
+ except Exception as e:
+ logger.error("Dedrm failed for %s: %s", name, e)
+ pass
+ except Exception as e:
+ logger.error(str(e))
+ logger.error(f"Title: {title}, Asin: {asin} download failed")
+
+ def download_books(self, start_index=0, filetype="EBOK"):
+ # use default device
+ device = self.get_devices()[0]
+ self.device_serial_number = device["deviceSerialNumber"]
+
+ logger.info(
+ f"Using default device serial Number: {device['deviceSerialNumber']}"
+ )
+ books = self.get_all_books(filetype=filetype, start_index=start_index)
+ if start_index > 0:
+ print(f"resuming the download {start_index + 1}/{self.total_to_download}")
+ index = start_index
+ for book in books:
+ self.download_one_book(book, device, index, filetype)
+ index += 1
+ if self.not_done:
+ logger.error(
+ f"\n\nNot All done!\nAmazon api limit when this download done.\n You can add command `--resume-from {index}` to resume download next time"
+ )
+ else:
+ if not self.dedrm:
+ logger.info(
+ "\n\nAll done!\nNow you can use apprenticeharper's DeDRM tools "
+ "(https://github.com/apprenticeharper/DeDRM_tools)\n"
+ "with the following serial number to remove DRM: "
+ + device["deviceSerialNumber"]
+ )
+ else:
+ logger.info(
+ "All done books saved in `DOWNLOAD`, dedrm files saved in `DEDRMS`"
+ )
+ with open(os.path.join(self.out_dir, "key.txt"), "w") as f:
+ f.write(f"Key is: {device['deviceSerialNumber']}")
diff --git a/kindle_download_helper/utils.py b/kindle_download_helper/utils.py
new file mode 100644
index 0000000..7ef798d
--- /dev/null
+++ b/kindle_download_helper/utils.py
@@ -0,0 +1,17 @@
+import re
+from kindle_download_helper.config import GITHUB_README_COMMENTS
+
+
+def replace_readme_comments(file_name, comment_str, comments_name):
+ with open(file_name, "r+", encoding="UTF-8") as f:
+ text = f.read()
+ # regrex sub from github readme comments
+ text = re.sub(
+ GITHUB_README_COMMENTS.format(name=comments_name),
+ r"\1{}\n\3".format(comment_str),
+ text,
+ flags=re.DOTALL,
+ )
+ f.seek(0)
+ f.write(text)
+ f.truncate()
diff --git a/my_kindle_stats.md b/my_kindle_stats.md
index 4ceb268..3a1da90 100644
--- a/my_kindle_stats.md
+++ b/my_kindle_stats.md
@@ -1,56 +1,3 @@
-
-## My kindle stats
-- I bought 38 books
- - I pushed 871 docks
- - My first book is 知乎周刊·商业的细节, bought on 2013年9月16日
- - My first doc is 天涯头条:《刘军宁:文明社会与言论自由,事关社会安定和对权力的制约》, bought on 2015年3月16日
-
-| ID | Title | Authors | Acquired | Read |
- | ---- | ---- | ---- | ---- | ---- |
-| 1 | 其主之声 | 斯坦尼斯瓦夫·莱姆 | 2022-5-3 | 2022-05-03 |
-| 2 | 奇鸟行状录 | 村上春树 | 2022-3-22 | 2022-03-22 |
-| 3 | Origin原型機-第03卷 | Boichi | 2021-6-1 | 2022-02-11 |
-| 4 | Origin原型機-第05卷 | Boichi | 2021-6-1 | 2022-02-11 |
-| 5 | Origin原型機-第07卷 | Boichi | 2021-6-1 | 2022-02-11 |
-| 6 | 第一人称单数 | 村上春树 | 2021-11-28 | 2022-02-05 |
-| 7 | 挽救计划 | 安迪·威尔 | 2021-11-18 | 2021-11-18 |
-| 8 | [日本名家小说集](https://www.amazon.cn/dp/B08P8KXYZ1) | 东野圭吾, 伊坂幸太郎... | 2021-5-1 | 2021-11-17 |
-| 9 | 两京十五日 | 马伯庸 | 2021-10-3 | 2021-10-05 |
-| 10 | 绝叫 | 叶真中显 | 2021-8-17 | 2021-08-22 |
-| 11 | 炎拳-第02卷 | 105965398155@vol.moe... | 2020-11-8 | 2021-07-24 |
-| 12 | Origin原型機-第02卷 | Boichi | 2021-6-1 | 2021-06-04 |
-| 13 | Origin原型機-第01卷 | Boichi | 2021-6-1 | 2021-06-03 |
-| 14 | 殺手寓言-第15卷 | 南勝久 | 2021-4-9 | 2021-04-10 |
-| 15 | 殺手寓言-第14卷 | 南勝久 | 2021-4-6 | 2021-04-09 |
-| 16 | 殺手寓言-第13卷 | 南勝久 | 2021-4-6 | 2021-04-09 |
-| 17 | 殺手寓言-第12卷 | 南勝久 | 2021-4-6 | 2021-04-08 |
-| 18 | 殺手寓言-第11卷 | 南勝久 | 2021-4-6 | 2021-04-08 |
-| 19 | 殺手寓言-第10卷 | 南勝久 | 2021-4-6 | 2021-04-08 |
-| 20 | 殺手寓言-第09卷 | 南勝久 | 2021-4-6 | 2021-04-07 |
-| 21 | 殺手寓言-第08卷 | 南勝久 | 2021-4-6 | 2021-04-07 |
-| 22 | 殺手寓言-第07卷 | 南勝久 | 2021-4-4 | 2021-04-05 |
-| 23 | 殺手寓言-第06卷 | 南勝久 | 2021-4-4 | 2021-04-05 |
-| 24 | 殺手寓言-第05卷 | 南勝久 | 2021-4-4 | 2021-04-05 |
-| 25 | 殺手寓言-第04卷 | 南勝久 | 2021-4-4 | 2021-04-04 |
-| 26 | 殺手寓言-第03卷 | 南勝久 | 2021-3-31 | 2021-04-03 |
-| 27 | 殺手寓言-第02卷 | 南勝久 | 2021-3-31 | 2021-04-02 |
-| 28 | 殺手寓言-第01卷 | 南勝久 | 2021-3-31 | 2021-04-01 |
-| 29 | 夏日重現-第12卷 | 田中靖規 | 2021-2-19 | 2021-03-06 |
-| 30 | 夏日重現-第11卷 | 田中靖規 | 2021-2-19 | 2021-03-05 |
-| 31 | 夏日重現-第10卷 | 田中靖規 | 2021-2-19 | 2021-03-04 |
-| 32 | 夏日重現-第09卷 | 田中靖規 | 2021-2-19 | 2021-03-04 |
-| 33 | 夏日重現-第08卷 | 田中靖規 | 2021-2-19 | 2021-02-28 |
-| 34 | 夏日重現-第07卷 | 田中靖規 | 2021-2-19 | 2021-02-28 |
-| 35 | 夏日重現-第06卷 | 田中靖規 | 2021-2-19 | 2021-02-26 |
-| 36 | 夏日重現-第05卷 | 田中靖規 | 2021-2-19 | 2021-02-23 |
-| 37 | 夏日重現-第04卷 | 田中靖規 | 2021-2-19 | 2021-02-21 |
-| 38 | 夏日重現-第03卷 | 田中靖規 | 2021-2-19 | 2021-02-21 |
-| 39 | 夏日重現-第02卷 | 田中靖規 | 2021-2-19 | 2021-02-19 |
-| 40 | 夏日重現-第01卷 | 田中靖規 | 2021-2-19 | 2021-02-19 |
-| 41 | 1984 | George Orwell... | 2015-7-1 | 2021-02-18 |
-| 42 | 炎拳-第01卷 | 105965398155@vol.moe... | 2020-11-8 | 2020-11-10 |
-| 43 | 1984 | George Orwell... | 2015-7-1 | 2020-04-25 |
-| 44 | 锦衣之下 | 蓝色狮 | 2020-1-7 | 2020-01-27 |
-
+
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..1a4cfd8
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,23 @@
+from setuptools import find_packages, setup
+
+setup(
+ name="kindle_download",
+ author="yihong0618",
+ author_email="zouzou0208@gmail.com",
+ url="https://github.com/yihong0618/kindle_download_helper",
+ license="GPL V3",
+ version="1.1.1",
+ description="Download all your kindle books and `DeDRM` script.",
+ long_description="Download all your kindle books and `DeDRM` script.",
+ packages=find_packages(),
+ include_package_data=True,
+ install_requires=[
+ "requests",
+ "browser-cookie3",
+ "faker",
+ "pywin32 ; sys_platform == 'win32'"
+ ],
+ entry_points={
+ "console_scripts": ["kindle_download = kindle_download_helper.cli:main"],
+ },
+)
diff --git a/ui_kindle.py b/ui_kindle.py
index a625741..d558270 100644
--- a/ui_kindle.py
+++ b/ui_kindle.py
@@ -300,6 +300,6 @@ class Ui_MainDialog(object):
self.label_6.setText(QCoreApplication.translate("MainDialog", u"\u9690\u79c1\u58f0\u660e\uff1a\u6211\u4eec\u4e0d\u4f1a\u6536\u96c6\u4efb\u4f55\u7528\u6237\u4fe1\u606f\uff0c\u8bf7\u653e\u5fc3\u4f7f\u7528", None))
self.label_3.setText(QCoreApplication.translate("MainDialog", u"Copyright 2022 \u00a9 [yihong0618](https://github.com/yihong0618) and [frostming](https://github.com/frostming)", None))
self.label_4.setText(QCoreApplication.translate("MainDialog", u"GitHub: ", None))
- self.label_5.setText(QCoreApplication.translate("MainDialog", u"License: MIT", None))
+ self.label_5.setText(QCoreApplication.translate("MainDialog", u"License: GPL V3", None))
# retranslateUi