feat: refactor and pypi

This commit is contained in:
yihong0618
2022-08-01 20:56:19 +08:00
parent d1a94f3e2a
commit 401c21ee35
19 changed files with 790 additions and 771 deletions

21
LICENSE
View File

@@ -1,21 +0,0 @@
MIT License
Copyright (c) 2022 yihong, frostming and contributors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -1,6 +1,6 @@
# Kindle_download_helper # Kindle_download_helper
Download all your kindle books script. Download all your kindle books and `DeDRM` script.
<img width="1661" alt="image" src="https://user-images.githubusercontent.com/15976103/172113700-7be0ae1f-1aae-4b50-8377-13047c63411b.png"> <img width="1661" alt="image" src="https://user-images.githubusercontent.com/15976103/172113700-7be0ae1f-1aae-4b50-8377-13047c63411b.png">
## 安装 Kindle_download_helper ## 安装 Kindle_download_helper
@@ -17,11 +17,15 @@ Download all your kindle books script.
- Mac 新手指南 by @chongiscool,见 [#76](https://github.com/yihong0618/Kindle_download_helper/issues/76) - Mac 新手指南 by @chongiscool,见 [#76](https://github.com/yihong0618/Kindle_download_helper/issues/76)
### Cli 安装使用 ### Cli 安装使用
1. python3 1. python3
2. requirements 2. requirements
or just pip
pip3 install kindle_download
```python ```python
python3 --version #查看 python 版本 python3 --version #查看 python 版本
``` ```
@@ -36,6 +40,7 @@ pip3 install -r requirements.txt
```python ```python
python3 kindle.py --h #查看使用参数 python3 kindle.py --h #查看使用参数
kindle_download --h # pip
usage: kindle.py [-h] [--cookie COOKIE | --cookie-file COOKIE_FILE] [--cn] [--jp] [--de] [--resume-from INDEX] usage: kindle.py [-h] [--cookie COOKIE | --cookie-file COOKIE_FILE] [--cn] [--jp] [--de] [--resume-from INDEX]
[--cut-length CUT_LENGTH] [-o OUTDIR] [-od OUTDEDRMDIR] [-s SESSION_FILE] [--pdoc] [--resolve_duplicate_names] [--cut-length CUT_LENGTH] [-o OUTDIR] [-od OUTDEDRMDIR] [-s SESSION_FILE] [--pdoc] [--resolve_duplicate_names]
@@ -66,7 +71,7 @@ options:
--resolve_duplicate_names --resolve_duplicate_names
Resolve duplicate names files to download Resolve duplicate names files to download
--readme If you want to generate kindle readme stats --readme If you want to generate kindle readme stats
--dedrm If you want to `dedrm` directly --dedrm If you want to `DeDRM` directly
--list just list books/pdoc, not to download --list just list books/pdoc, not to download
``` ```
@@ -77,6 +82,8 @@ options:
```python ```python
python3 kindle.py --dedrm --cn ## --dedrm 移除 DRM python3 kindle.py --dedrm --cn ## --dedrm 移除 DRM
or
kindle_download --dedrm --cn
``` ```
(推荐) 手动输入 cookie、csrfToken 进行下载 (推荐) 手动输入 cookie、csrfToken 进行下载
@@ -84,6 +91,9 @@ python3 kindle.py --dedrm --cn ## --dedrm 移除 DRM
```python ```python
python3 kindle.py ${csrfToken} --cookie ${cookie} --dedrm --cn #下载国区 Kindle 书籍并移除 DRM python3 kindle.py ${csrfToken} --cookie ${cookie} --dedrm --cn #下载国区 Kindle 书籍并移除 DRM
python3 kindle.py ${csrfToken} --cookie ${cookie} --dedrm #下载美区 Kindle 书籍 python3 kindle.py ${csrfToken} --cookie ${cookie} --dedrm #下载美区 Kindle 书籍
or
kindle_download ${csrfToken} --cookie ${cookie} --dedrm --cn #下载国区 Kindle 书籍并移除 DRM
kindle_download ${csrfToken} --cookie ${cookie} --dedrm #下载美区 Kindle 书籍
``` ```
### 获取 cookie ### 获取 cookie
@@ -154,7 +164,7 @@ python3 kindle.py --cn --cookie ${cookie} ${csrfToken}
- cookie 和 csrf token 会过期,重新刷新下 amazon 的页面就行 - cookie 和 csrf token 会过期,重新刷新下 amazon 的页面就行
- 程序会自动在命令执行的目录下创建 `DOWNLOADS` 目录,书会下载在 `DOWNLOADS` - 程序会自动在命令执行的目录下创建 `DOWNLOADS` 目录,书会下载在 `DOWNLOADS`
- 支持 mobi 类型的文件直接 dedrm `--dedrm` 生成的文件在 `DEDRMS` - 支持 mobi 类型的文件直接 DeDRM `--dedrm` 生成的文件在 `DEDRMS`
- 如果你用 [DeDRM_tools](https://github.com/apprenticeharper/DeDRM_tools) 解密 key 存在 key.txt 里 - 如果你用 [DeDRM_tools](https://github.com/apprenticeharper/DeDRM_tools) 解密 key 存在 key.txt 里
- 或者直接拖进 Calibre 里 please google it. - 或者直接拖进 Calibre 里 please google it.
- 如果过程中失败了可以使用 e.g. `--resume-from ${num}` - 如果过程中失败了可以使用 e.g. `--resume-from ${num}`
@@ -169,7 +179,8 @@ python3 kindle.py --cn --cookie ${cookie} ${csrfToken}
- The cookie and csrf token will expire, just refresh the amazon page again. - The cookie and csrf token will expire, just refresh the amazon page again.
- The program will automatically create `DOWNLOADS` directory under the command execution directory, the book will be downloaded in `DOWNLOADS` directory. - The program will automatically create `DOWNLOADS` directory under the command execution directory, the book will be downloaded in `DOWNLOADS` directory.
- If you use [DeDRM_tools](https://github.com/apprenticeharper/DeDRM_tools) to decrypt the key, it will be stored in key.txt - Support DeDRM with `--dedrm`
- or use [DeDRM_tools](https://github.com/apprenticeharper/DeDRM_tools) to decrypt the key, it will be stored in key.txt
- or just drag it into Calibre. Please google it. - or just drag it into Calibre. Please google it.
- If the process fails you can use e.g. `--resume-from ${num}` - If the process fails you can use e.g. `--resume-from ${num}`
- If the name is too long, you can add: `-cut-length 80` to truncate the file name - If the name is too long, you can add: `-cut-length 80` to truncate the file name

View File

@@ -1816,7 +1816,7 @@ qt_resource_struct = b"\
\x00\x00\x00\x0e\x00\x02\x00\x00\x00\x01\x00\x00\x00\x03\ \x00\x00\x00\x0e\x00\x02\x00\x00\x00\x01\x00\x00\x00\x03\
\x00\x00\x00\x00\x00\x00\x00\x00\ \x00\x00\x00\x00\x00\x00\x00\x00\
\x00\x00\x00$\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\ \x00\x00\x00$\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\
\x00\x00\x01\x82\x0f!\xe2\xc8\ \x00\x00\x01\x82X\xe5\xc8\x87\
" "
def qInitResources(): def qInitResources():

690
kindle.py
View File

@@ -1,691 +1,5 @@
""" from kindle_download_helper import main
Note some download code from: https://github.com/sghctoma/bOOkp
Great Thanks
"""
import argparse
import atexit
import html
import json
import logging
import os
import pickle
import re
import time
import urllib
from http.cookies import SimpleCookie
import requests
import urllib3
from faker import Faker
from requests.adapters import HTTPAdapter
from dedrm import MobiBook, get_pid_list
try:
import browser_cookie3
except ModuleNotFoundError:
print("not found browser_cookie3 here, you should use --cookie command")
logger = logging.getLogger("kindle")
fh = logging.FileHandler(".error_books.log")
fh.setLevel(logging.ERROR)
logger.addHandler(fh)
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
DEFAULT_OUT_DIR = "DOWNLOADS"
DEFAULT_OUT_DEDRM_DIR = "DEDRMS"
DEFAULT_SESSION_FILE = ".kindle_session"
KINDLE_HEADER = {
"User-Agent": Faker().user_agent(),
}
CONTENT_TYPES = {
"EBOK": "Ebook",
"PDOC": "KindlePDoc",
}
KINDLE_URLS = {
"cn": {
"bookall": "https://www.amazon.cn/hz/mycd/myx#/home/content/booksAll",
"download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}&authPool=AmazonCN",
"payload": "https://www.amazon.cn/hz/mycd/ajax",
"insights": "https://www.amazon.cn/kindle/reading/insights/data",
"book_url": "https://www.amazon.cn/dp/{book_id}",
},
"jp": {
"bookall": "https://www.amazon.jp/hz/mycd/myx#/home/content/booksAll",
"download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}",
"payload": "https://www.amazon.co.jp/hz/mycd/ajax",
"insights": "https://www.amazon.co.jp/kindle/reading/insights/data",
"book_url": "https://www.amazon.co.jp/dp/{book_id}",
},
"de": {
"bookall": "https://www.amazon.de/hz/mycd/myx#/home/content/booksAll",
"download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}",
"payload": "https://www.amazon.de/hz/mycd/ajax",
"insights": "https://www.amazon.de/kindle/reading/insights/data",
"book_url": "https://www.amazon.de/dp/{book_id}",
},
"com": {
"bookall": "https://www.amazon.com/hz/mycd/myx#/home/content/booksAll",
"download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}",
"payload": "https://www.amazon.com/hz/mycd/ajax",
"insights": "https://www.amazon.com/kindle/reading/insights/data",
"book_url": "https://www.amazon.com/dp/{book_id}",
},
}
# for kindle stats
GITHUB_README_COMMENTS = (
"(<!--START_SECTION:{name}-->\n)(.*)(<!--END_SECTION:{name}-->\n)"
)
MY_KINDLE_STATS_INFO_HEAD = "## My Kindle Stats\n"
MY_KINDLE_STATS_INFO = "- I bought {books_len} books\n \
- I pushed {pdocs_len} docks\n \
- My first book is {first_book_title}, bought on {first_book_bought_date}\n \
- My first doc is {first_doc_title}, pushed on {first_doc_push_date}\n\n"
KINDLE_TABLE_HEAD = "| ID | Title | Authors | Acquired | Read | \n | ---- | ---- | ---- | ---- | ---- |\n"
KINDLE_STAT_TEMPLATE = "| {id} | {title} | {authors} | {acquired} | {read} |\n"
def replace_readme_comments(file_name, comment_str, comments_name):
with open(file_name, "r+", encoding="UTF-8") as f:
text = f.read()
# regrex sub from github readme comments
text = re.sub(
GITHUB_README_COMMENTS.format(name=comments_name),
r"\1{}\n\3".format(comment_str),
text,
flags=re.DOTALL,
)
f.seek(0)
f.write(text)
f.truncate()
class Kindle:
def __init__(
self,
csrf_token=None,
domain="cn",
out_dir=DEFAULT_OUT_DIR,
out_dedrm_dir=DEFAULT_OUT_DEDRM_DIR,
cut_length=100,
session_file=DEFAULT_SESSION_FILE,
):
self.urls = KINDLE_URLS[domain]
self._csrf_token = csrf_token
self.total_to_download = 0
self.out_dir = out_dir
self.out_dedrm_dir = out_dedrm_dir
self.dedrm = False
self.cut_length = cut_length
self.not_done = False
self.session_file = session_file
self.session = self.make_session()
self.is_browser_cookie = False
self.to_resolve_duplicate_names = False
self.books_info_dict = {}
self.file_type_list = ["EBOOK", "PDOC"]
atexit.register(self.dump_session)
def set_cookie(self, cookiejar):
if not cookiejar:
raise Exception("Please make sure your amazon cookie is right")
self.session.cookies.clear()
self.session.cookies.update(cookiejar)
def set_cookie_from_string(self, cookie_string):
cj = self._parse_kindle_cookie(cookie_string)
self.set_cookie(cj)
def dump_session(self):
with open(self.session_file, "wb") as f:
pickle.dump(self.session, f)
@property
def csrf_token(self):
if not self._csrf_token:
self._csrf_token = self._get_csrf_token()
return self._csrf_token
@csrf_token.setter
def csrf_token(self, csrf_token):
self._csrf_token = csrf_token
def ensure_session_cookie(self):
if not self.session.cookies:
logger.debug("No cookie found, trying to load from browsers")
try:
self.set_cookie(browser_cookie3.load(domain_name="amazon"))
except:
print("not found browser_cookie3 here, you should use --cookie command")
@staticmethod
def _parse_kindle_cookie(kindle_cookie):
cookie = SimpleCookie()
cookie.load(kindle_cookie)
cookies_dict = {}
cookiejar = None
for key, morsel in cookie.items():
cookies_dict[key] = morsel.value
cookiejar = requests.utils.cookiejar_from_dict(
cookies_dict, cookiejar=None, overwrite=True
)
return cookiejar
def _get_csrf_token(self):
"""
TODO: I do not know why I have to get csrf token in the page not in this way
maybe figure out why in the future
"""
r = self.session.get(self.urls["bookall"])
match = re.search(r'var csrfToken = "(.*)";', r.text)
if not match:
self.revoke_cookie_token(open_page=self.is_browser_cookie)
raise Exception(
"Can't get the csrf token, "
f"please refresh the page at {self.urls['bookall']} and retry"
)
return match.group(1)
def refresh_browser_cookie(self):
import webbrowser
try:
webbrowser.open(self.urls["bookall"])
except Exception:
pass
def revoke_cookie_token(self, open_page=False):
# help user open it directly.
logger.info(
"Opening the url to get cookie...You can wait for the page to finish loading and retry"
)
self._csrf_token = None # reset the token
# clear the cookies so the next time it can be reloaded from the browsers
self.session.cookies.clear()
if open_page:
self.refresh_browser_cookie()
def ensure_cookie_token(self):
if not self._csrf_token:
if not self.session.cookies:
self.refresh_browser_cookie()
self.ensure_session_cookie()
self._csrf_token = self._get_csrf_token()
logger.debug(
f"session-id: { self.session.cookies.get_dict().get('session-id') }"
)
def make_session(self):
if os.path.exists(self.session_file):
with open(self.session_file, "rb") as f:
session = pickle.load(f)
else:
session = requests.Session()
session.headers.update(KINDLE_HEADER)
session.mount(
# will retry 5 times after 0.5, 1.0, 2.0, 4.0, ... seconds for
# (413, 429, 503) statuses
"https://",
HTTPAdapter(max_retries=urllib3.Retry(5, backoff_factor=0.5)),
)
logger.debug(f"user-agent: { session.headers.get('User-Agent') }")
return session
def get_devices(self):
"""
This method must be called before each download, so we ensure
the session cookies before it is called
"""
self.ensure_cookie_token()
payload = {"param": {"GetDevices": {}}}
r = self.session.post(
self.urls["payload"],
data={
"data": json.dumps(payload),
"csrfToken": self.csrf_token,
},
)
r.raise_for_status()
devices = r.json()
if devices.get("error"):
self.revoke_cookie_token(open_page=True)
raise Exception(
f"Error: {devices.get('error')}, please visit {self.urls['bookall']} to revoke the csrftoken and cookie"
)
devices = r.json()["GetDevices"]["devices"]
# sleep get device first time.
logger.info("Amazon open their bot check will sleep 3s")
time.sleep(3)
if not devices:
raise Exception("No devices are bound to this account")
return [device for device in devices if "deviceSerialNumber" in device]
def get_all_books(self, start_index=0, filetype="EBOK"):
"""
TODO: refactor this function
"""
# some info
if filetype == "PDOC":
logger.info(
"It will take some time to get all PDOC books list, please wait"
)
startIndex = start_index
batchSize = 100
payload = {
"param": {
"OwnershipData": {
"sortOrder": "DESCENDING",
"sortIndex": "DATE",
"startIndex": startIndex,
"batchSize": batchSize,
"contentType": CONTENT_TYPES[filetype],
"itemStatus": ["Active"],
}
}
}
if filetype == "EBOK":
payload["param"]["OwnershipData"].update(
{
"originType": ["Purchase"],
}
)
else:
batchSize = 18
payload["param"]["OwnershipData"].update(
{
"batchSize": batchSize,
"isExtendedMYK": False,
}
)
books = []
### added by yihong0618 2022.06.27
### this ugly code is for amazon open their bot check
### if the bot check close
### will delete the try and try code
break_times = 0
while True:
# anyway sleep 0.5
time.sleep(0.5)
r = self.session.post(
self.urls["payload"],
data={"data": json.dumps(payload), "csrfToken": self.csrf_token},
)
# try three times for bot check
if r.status_code == 503:
# sleep and try again
sleep_seconds = 5 + 2 * break_times
time.sleep(sleep_seconds)
logger.info(
f"Amazon open their bot check will sleep {sleep_seconds}s and try this api again, now index: {startIndex}/{self.total_to_download}"
)
if break_times < 7:
break_times += 1
r = self.session.post(
self.urls["payload"],
data={"data": json.dumps(payload), "csrfToken": self.csrf_token},
)
if not r.ok:
if r.status_code == 503:
time.sleep(sleep_seconds)
logger.info(
f"Amazon open their bot check will sleep {sleep_seconds}s last time and try this api again, now index: {startIndex}/{self.total_to_download}"
)
logger.info(f"Next time fail will break the loop")
r = self.session.post(
self.urls["payload"],
data={
"data": json.dumps(payload),
"csrfToken": self.csrf_token,
},
)
break_times += 1
if not r.ok:
# amazon limit this api
if startIndex == 0:
logger.error(
"Amazon api limit when this download done.\n Please run it again`"
)
else:
self.not_done = True
logger.error(
"Amazon api limit when this download done.\n You can add command `--resume-from %s`",
startIndex,
)
break
result = r.json()
if not result.get("success", True):
logger.error("get all books error: %s", result.get("error"))
break
items = result["OwnershipData"]["items"]
for item in items:
if filetype == "PDOC":
item["title"] = html.unescape(item["title"])
item["authors"] = html.unescape(item.pop("author", ""))
if item.get("readStatus", "") == "READ":
self.books_info_dict[item["asin"]] = item
books.extend(items)
if not self.total_to_download:
self.total_to_download = result["OwnershipData"]["numberOfItems"]
if result["OwnershipData"]["hasMoreItems"]:
startIndex += batchSize
payload["param"]["OwnershipData"]["startIndex"] = startIndex
else:
break
return books
def _get_reading_stats(self):
insights_url = self.urls["insights"]
r = self.session.get(insights_url)
if r.ok:
return r.json()
logger.error(f"Something is wrong get the stats data url: {insights_url}")
raise Exception(f"Something is wrong get the stats data url: {insights_url}")
def _make_one_book_stats_info(self, book_info):
book_url = self.urls["book_url"]
asin = book_info["asin"]
book = self.books_info_dict.get(asin)
book_title = book.get("title", "")
# filter the brackets in the book title
book_title = re.sub(
r"(\[^)]*\)|(\([^)]*\))|(\【[^)]*\】)|(\[[^)]*\])|(\s)", "", book_title
)
book_title = book_title.replace(" ", "")
if book.get("category", "") == "KindleEBook":
book_url = book_url.format(book_id=asin)
book_title = f"[{book_title}]({book_url})"
book_authors = book.get("authors")
if len(book_authors) > 10:
book_authors = ",".join(book_authors.split(",")[:2]) + "..."
# only keep date
read = book_info.get("date_read")[:10]
acquired = (
book.get("acquiredDate", "")
.replace("", "-")
.replace("", "-")
.replace("", "")
)
return book_title, book_authors, acquired, read
def make_kindle_stats_readme(self):
reading_stats = self._get_reading_stats()
read_list = reading_stats.get("goal_info", {}).get("titles_read")
ebooks = self.get_all_books(filetype="EBOK")
pdocs = self.get_all_books(filetype="PDOC")
first_ebook, first_pdoc = ebooks[-1], pdocs[-1]
print(len(self.books_info_dict.keys()), first_ebook, first_pdoc)
print(read_list)
s = MY_KINDLE_STATS_INFO_HEAD
kindle_stats_str = MY_KINDLE_STATS_INFO.format(
books_len=len(ebooks),
pdocs_len=len(pdocs),
first_book_title=first_ebook["title"],
first_book_bought_date=first_ebook["acquiredDate"],
first_doc_title=first_pdoc["title"],
first_doc_push_date=first_pdoc["acquiredDate"],
)
s += kindle_stats_str
s += KINDLE_TABLE_HEAD
index = 1
for book_info in read_list:
book_title, book_authors, acquired, read = self._make_one_book_stats_info(
book_info
)
s += KINDLE_STAT_TEMPLATE.format(
id=str(index),
title=book_title,
authors=book_authors,
acquired=acquired,
read=read,
)
index += 1
replace_readme_comments("my_kindle_stats.md", s, "my_kindle")
def download_one_book(self, book, device, index, filetype="EBOK"):
title = book["title"]
asin = book["asin"]
try:
download_url = self.urls["download"].format(
filetype,
asin,
device["deviceSerialNumber"],
device["deviceType"],
device["customerId"],
)
r = self.session.get(download_url, verify=False, stream=True)
r.raise_for_status()
name = re.findall(
r"filename\*=UTF-8''(.+)", r.headers["Content-Disposition"]
)[0]
name = urllib.parse.unquote(name)
_, extname = os.path.splitext(name)
name = title + extname
name = re.sub(r'[\\/:*?"<>|]', "_", name)
##### if you have many duplicate name books #####
if self.to_resolve_duplicate_names:
name = f"{asin}_{name}"
if len(name) > self.cut_length:
name = name[: self.cut_length - 5] + name[-5:]
total_size = r.headers["Content-length"]
out = os.path.join(self.out_dir, name)
out_dedrm = os.path.join(self.out_dedrm_dir, name)
logger.info(
f"({index + 1}/{self.total_to_download})downloading {name} {total_size} bytes"
)
with open(out, "wb") as f:
for chunk in r.iter_content(chunk_size=512):
f.write(chunk)
logger.info(f"{name} downloaded")
# for dedrm
if self.dedrm:
try:
mb = MobiBook(out)
md1, md2 = mb.get_pid_meta_info()
totalpids = get_pid_list(md1, md2, [self.device_serial_number], [])
totalpids = list(set(totalpids))
mb.make_drm_file(totalpids, out_dedrm)
except Exception as e:
logger.error("Dedrm failed for %s: %s", name, e)
pass
except Exception as e:
logger.error(str(e))
logger.error(f"Title: {title}, Asin: {asin} download failed")
def download_books(self, start_index=0, filetype="EBOK"):
# use default device
device = self.get_devices()[0]
self.device_serial_number = device["deviceSerialNumber"]
logger.info(
f"Using default device serial Number: {device['deviceSerialNumber']}"
)
books = self.get_all_books(filetype=filetype, start_index=start_index)
if start_index > 0:
print(f"resuming the download {start_index + 1}/{self.total_to_download}")
index = start_index
for book in books:
self.download_one_book(book, device, index, filetype)
index += 1
if self.not_done:
logger.error(
f"\n\nNot All done!\nAmazon api limit when this download done.\n You can add command `--resume-from {index}` to resume download next time"
)
else:
if not self.dedrm:
logger.info(
"\n\nAll done!\nNow you can use apprenticeharper's DeDRM tools "
"(https://github.com/apprenticeharper/DeDRM_tools)\n"
"with the following serial number to remove DRM: "
+ device["deviceSerialNumber"]
)
else:
logger.info(
"All done books saved in `DOWNLOAD`, dedrm files saved in `DEDRMS`"
)
with open(os.path.join(self.out_dir, "key.txt"), "w") as f:
f.write(f"Key is: {device['deviceSerialNumber']}")
if __name__ == "__main__": if __name__ == "__main__":
main()
logger.setLevel(os.environ.get("LOGGING_LEVEL", "INFO"))
logger.addHandler(logging.StreamHandler())
parser = argparse.ArgumentParser()
parser.add_argument("csrf_token", help="amazon or amazon cn csrf token", nargs="?")
cookie_group = parser.add_mutually_exclusive_group()
cookie_group.add_argument(
"--cookie", dest="cookie", default="", help="amazon or amazon cn cookie"
)
cookie_group.add_argument(
"--cookie-file", dest="cookie_file", default="", help="load cookie local file"
)
parser.add_argument(
"--cn",
dest="domain",
action="store_const",
const="cn",
default="com",
help="if your account is an amazon.cn account",
)
parser.add_argument(
"--jp",
dest="domain",
action="store_const",
const="jp",
default="com",
help="if your account is an amazon.jp account",
)
parser.add_argument(
"--de",
dest="domain",
action="store_const",
const="de",
default="com",
help="if your account is an amazon.de account",
)
parser.add_argument(
"--resume-from",
dest="index",
type=int,
default=1,
help="resume from the index if download failed",
)
parser.add_argument(
"--cut-length",
dest="cut_length",
type=int,
default=100,
help="truncate the file name",
)
parser.add_argument(
"-o", "--outdir", default=DEFAULT_OUT_DIR, help="dwonload output dir"
)
parser.add_argument(
"-od",
"--outdedrmdir",
default=DEFAULT_OUT_DEDRM_DIR,
help="dwonload output dedrm dir",
)
parser.add_argument(
"-s",
"--session-file",
default=DEFAULT_SESSION_FILE,
help="The reusable session dump file",
)
parser.add_argument(
"--pdoc",
dest="filetype",
action="store_const",
const="PDOC",
default="EBOK",
help="to download personal documents or ebook",
)
parser.add_argument(
"--resolve_duplicate_names",
dest="resolve_duplicate_names",
action="store_true",
help="Resolve duplicate names files to download",
)
parser.add_argument(
"--readme",
dest="readme",
action="store_true",
help="If you want to generate kindle readme stats",
)
parser.add_argument(
"--dedrm",
dest="dedrm",
action="store_true",
help="If you want to `dedrm` directly",
)
parser.add_argument(
"--list",
dest="list_only",
action="store_true",
help="just list books/pdoc, not to download",
)
options = parser.parse_args()
if not os.path.exists(options.outdir):
os.makedirs(options.outdir)
# for dedrm
if not os.path.exists(options.outdedrmdir):
os.makedirs(options.outdedrmdir)
kindle = Kindle(
options.csrf_token,
options.domain,
options.outdir,
options.outdedrmdir,
options.cut_length,
session_file=options.session_file,
)
# other args
kindle.to_resolve_duplicate_names = options.resolve_duplicate_names
kindle.dedrm = options.dedrm
if options.cookie_file:
with open(options.cookie_file, "r") as f:
kindle.set_cookie_from_string(f.read())
elif options.cookie:
kindle.set_cookie_from_string(options.cookie)
else:
kindle.is_browser_cookie = True
if options.list_only:
kindle.get_devices()
print(
json.dumps(
kindle.get_all_books(filetype=options.filetype),
indent=4,
ensure_ascii=False,
)
)
exit()
if options.readme:
# generate readme stats
kindle.make_kindle_stats_readme()
else:
kindle.download_books(start_index=options.index - 1, filetype=options.filetype)

View File

@@ -340,7 +340,7 @@ hr { height: 1px; border-width: 0; }
<item> <item>
<widget class="QLabel" name="label_5"> <widget class="QLabel" name="label_5">
<property name="text"> <property name="text">
<string>License: MIT</string> <string>License: GPL V3</string>
</property> </property>
</widget> </widget>
</item> </item>

View File

@@ -7,7 +7,7 @@ from typing import NamedTuple
from PySide6 import QtCore, QtGui, QtWidgets from PySide6 import QtCore, QtGui, QtWidgets
import kindle from kindle_download_helper import kindle as kindle
from ui_kindle import Ui_MainDialog from ui_kindle import Ui_MainDialog
logger = logging.getLogger("kindle") logger = logging.getLogger("kindle")

View File

@@ -0,0 +1,2 @@
from kindle_download_helper.cli import main
from kindle_download_helper import kindle

View File

@@ -0,0 +1 @@
from cli import main

View File

@@ -0,0 +1,170 @@
from kindle_download_helper.kindle import Kindle
import argparse
import os
import urllib3
import logging
import json
from kindle_download_helper.config import (
DEFAULT_OUT_DIR,
DEFAULT_SESSION_FILE,
DEFAULT_OUT_DEDRM_DIR,
)
logger = logging.getLogger("kindle")
fh = logging.FileHandler(".error_books.log")
fh.setLevel(logging.ERROR)
logger.addHandler(fh)
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def main():
logger.setLevel(os.environ.get("LOGGING_LEVEL", "INFO"))
logger.addHandler(logging.StreamHandler())
parser = argparse.ArgumentParser()
parser.add_argument("csrf_token", help="amazon or amazon cn csrf token", nargs="?")
cookie_group = parser.add_mutually_exclusive_group()
cookie_group.add_argument(
"--cookie", dest="cookie", default="", help="amazon or amazon cn cookie"
)
cookie_group.add_argument(
"--cookie-file", dest="cookie_file", default="", help="load cookie local file"
)
parser.add_argument(
"--cn",
dest="domain",
action="store_const",
const="cn",
default="com",
help="if your account is an amazon.cn account",
)
parser.add_argument(
"--jp",
dest="domain",
action="store_const",
const="jp",
default="com",
help="if your account is an amazon.jp account",
)
parser.add_argument(
"--de",
dest="domain",
action="store_const",
const="de",
default="com",
help="if your account is an amazon.de account",
)
parser.add_argument(
"--resume-from",
dest="index",
type=int,
default=1,
help="resume from the index if download failed",
)
parser.add_argument(
"--cut-length",
dest="cut_length",
type=int,
default=100,
help="truncate the file name",
)
parser.add_argument(
"-o", "--outdir", default=DEFAULT_OUT_DIR, help="dwonload output dir"
)
parser.add_argument(
"-od",
"--outdedrmdir",
default=DEFAULT_OUT_DEDRM_DIR,
help="dwonload output dedrm dir",
)
parser.add_argument(
"-s",
"--session-file",
default=DEFAULT_SESSION_FILE,
help="The reusable session dump file",
)
parser.add_argument(
"--pdoc",
dest="filetype",
action="store_const",
const="PDOC",
default="EBOK",
help="to download personal documents or ebook",
)
parser.add_argument(
"--resolve_duplicate_names",
dest="resolve_duplicate_names",
action="store_true",
help="Resolve duplicate names files to download",
)
parser.add_argument(
"--readme",
dest="readme",
action="store_true",
help="If you want to generate kindle readme stats",
)
parser.add_argument(
"--dedrm",
dest="dedrm",
action="store_true",
help="If you want to `dedrm` directly",
)
parser.add_argument(
"--list",
dest="list_only",
action="store_true",
help="just list books/pdoc, not to download",
)
options = parser.parse_args()
if not os.path.exists(options.outdir):
os.makedirs(options.outdir)
# for dedrm
if not os.path.exists(options.outdedrmdir):
os.makedirs(options.outdedrmdir)
kindle = Kindle(
options.csrf_token,
options.domain,
options.outdir,
options.outdedrmdir,
options.cut_length,
session_file=options.session_file,
)
# other args
kindle.to_resolve_duplicate_names = options.resolve_duplicate_names
kindle.dedrm = options.dedrm
if options.cookie_file:
with open(options.cookie_file, "r") as f:
kindle.set_cookie_from_string(f.read())
elif options.cookie:
kindle.set_cookie_from_string(options.cookie)
else:
kindle.is_browser_cookie = True
if options.list_only:
kindle.get_devices()
print(
json.dumps(
kindle.get_all_books(filetype=options.filetype),
indent=4,
ensure_ascii=False,
)
)
exit()
if options.readme:
# generate readme stats
kindle.make_kindle_stats_readme()
else:
kindle.download_books(start_index=options.index - 1, filetype=options.filetype)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,56 @@
from faker import Faker
DEFAULT_OUT_DIR = "DOWNLOADS"
DEFAULT_OUT_DEDRM_DIR = "DEDRMS"
DEFAULT_SESSION_FILE = ".kindle_session"
KINDLE_HEADER = {
"User-Agent": Faker().user_agent(),
}
CONTENT_TYPES = {
"EBOK": "Ebook",
"PDOC": "KindlePDoc",
}
KINDLE_URLS = {
"cn": {
"bookall": "https://www.amazon.cn/hz/mycd/myx#/home/content/booksAll",
"download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}&authPool=AmazonCN",
"payload": "https://www.amazon.cn/hz/mycd/ajax",
"insights": "https://www.amazon.cn/kindle/reading/insights/data",
"book_url": "https://www.amazon.cn/dp/{book_id}",
},
"jp": {
"bookall": "https://www.amazon.jp/hz/mycd/myx#/home/content/booksAll",
"download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}",
"payload": "https://www.amazon.co.jp/hz/mycd/ajax",
"insights": "https://www.amazon.co.jp/kindle/reading/insights/data",
"book_url": "https://www.amazon.co.jp/dp/{book_id}",
},
"de": {
"bookall": "https://www.amazon.de/hz/mycd/myx#/home/content/booksAll",
"download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}",
"payload": "https://www.amazon.de/hz/mycd/ajax",
"insights": "https://www.amazon.de/kindle/reading/insights/data",
"book_url": "https://www.amazon.de/dp/{book_id}",
},
"com": {
"bookall": "https://www.amazon.com/hz/mycd/myx#/home/content/booksAll",
"download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}",
"payload": "https://www.amazon.com/hz/mycd/ajax",
"insights": "https://www.amazon.com/kindle/reading/insights/data",
"book_url": "https://www.amazon.com/dp/{book_id}",
},
}
# for kindle stats
GITHUB_README_COMMENTS = (
"(<!--START_SECTION:{name}-->\n)(.*)(<!--END_SECTION:{name}-->\n)"
)
MY_KINDLE_STATS_INFO_HEAD = "## My Kindle Stats\n"
MY_KINDLE_STATS_INFO = "- I bought {books_len} books\n- I pushed {pdocs_len} docks\n- My first book is {first_book_title}, bought on {first_book_bought_date}\n- My first doc is {first_doc_title}, pushed on {first_doc_push_date}\n\n"
KINDLE_TABLE_HEAD = "| ID | Title | Authors | Acquired | Read | \n | ---- | ---- | ---- | ---- | ---- |\n"
KINDLE_STAT_TEMPLATE = "| {id} | {title} | {authors} | {acquired} | {read} |\n"

View File

@@ -0,0 +1,499 @@
"""
Note some download code from: https://github.com/sghctoma/bOOkp
Great Thanks
"""
import atexit
import html
import json
import logging
import os
import pickle
import re
import time
import urllib
from http.cookies import SimpleCookie
import requests
import urllib3
from requests.adapters import HTTPAdapter
from kindle_download_helper.dedrm import MobiBook, get_pid_list
from kindle_download_helper.config import (
KINDLE_URLS,
DEFAULT_OUT_DIR,
DEFAULT_SESSION_FILE,
DEFAULT_OUT_DEDRM_DIR,
CONTENT_TYPES,
KINDLE_STAT_TEMPLATE,
)
from kindle_download_helper.config import (
MY_KINDLE_STATS_INFO_HEAD,
KINDLE_HEADER,
MY_KINDLE_STATS_INFO,
KINDLE_TABLE_HEAD,
)
from kindle_download_helper.utils import replace_readme_comments
try:
import browser_cookie3
except ModuleNotFoundError:
print("not found browser_cookie3 here, you should use --cookie command")
logger = logging.getLogger("kindle")
fh = logging.FileHandler(".error_books.log")
fh.setLevel(logging.ERROR)
logger.addHandler(fh)
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
class Kindle:
def __init__(
self,
csrf_token=None,
domain="cn",
out_dir=DEFAULT_OUT_DIR,
out_dedrm_dir=DEFAULT_OUT_DEDRM_DIR,
cut_length=100,
session_file=DEFAULT_SESSION_FILE,
):
self.urls = KINDLE_URLS[domain]
self._csrf_token = csrf_token
self.total_to_download = 0
self.out_dir = out_dir
self.out_dedrm_dir = out_dedrm_dir
self.dedrm = False
self.cut_length = cut_length
self.not_done = False
self.session_file = session_file
self.session = self.make_session()
self.is_browser_cookie = False
self.to_resolve_duplicate_names = False
self.books_info_dict = {}
self.file_type_list = ["EBOOK", "PDOC"]
atexit.register(self.dump_session)
def set_cookie(self, cookiejar):
if not cookiejar:
raise Exception("Please make sure your amazon cookie is right")
self.session.cookies.clear()
self.session.cookies.update(cookiejar)
def set_cookie_from_string(self, cookie_string):
cj = self._parse_kindle_cookie(cookie_string)
self.set_cookie(cj)
def dump_session(self):
with open(self.session_file, "wb") as f:
pickle.dump(self.session, f)
@property
def csrf_token(self):
if not self._csrf_token:
self._csrf_token = self._get_csrf_token()
return self._csrf_token
@csrf_token.setter
def csrf_token(self, csrf_token):
self._csrf_token = csrf_token
def ensure_session_cookie(self):
if not self.session.cookies:
logger.debug("No cookie found, trying to load from browsers")
try:
self.set_cookie(browser_cookie3.load(domain_name="amazon"))
except:
print("not found browser_cookie3 here, you should use --cookie command")
@staticmethod
def _parse_kindle_cookie(kindle_cookie):
cookie = SimpleCookie()
cookie.load(kindle_cookie)
cookies_dict = {}
cookiejar = None
for key, morsel in cookie.items():
cookies_dict[key] = morsel.value
cookiejar = requests.utils.cookiejar_from_dict(
cookies_dict, cookiejar=None, overwrite=True
)
return cookiejar
def _get_csrf_token(self):
"""
TODO: I do not know why I have to get csrf token in the page not in this way
maybe figure out why in the future
"""
r = self.session.get(self.urls["bookall"])
match = re.search(r'var csrfToken = "(.*)";', r.text)
if not match:
self.revoke_cookie_token(open_page=self.is_browser_cookie)
raise Exception(
"Can't get the csrf token, "
f"please refresh the page at {self.urls['bookall']} and retry"
)
return match.group(1)
def refresh_browser_cookie(self):
import webbrowser
try:
webbrowser.open(self.urls["bookall"])
except Exception:
pass
def revoke_cookie_token(self, open_page=False):
# help user open it directly.
logger.info(
"Opening the url to get cookie...You can wait for the page to finish loading and retry"
)
self._csrf_token = None # reset the token
# clear the cookies so the next time it can be reloaded from the browsers
self.session.cookies.clear()
if open_page:
self.refresh_browser_cookie()
def ensure_cookie_token(self):
if not self._csrf_token:
if not self.session.cookies:
self.refresh_browser_cookie()
self.ensure_session_cookie()
self._csrf_token = self._get_csrf_token()
logger.debug(
f"session-id: { self.session.cookies.get_dict().get('session-id') }"
)
def make_session(self):
if os.path.exists(self.session_file):
with open(self.session_file, "rb") as f:
session = pickle.load(f)
else:
session = requests.Session()
session.headers.update(KINDLE_HEADER)
session.mount(
# will retry 5 times after 0.5, 1.0, 2.0, 4.0, ... seconds for
# (413, 429, 503) statuses
"https://",
HTTPAdapter(max_retries=urllib3.Retry(5, backoff_factor=0.5)),
)
logger.debug(f"user-agent: { session.headers.get('User-Agent') }")
return session
def get_devices(self):
"""
This method must be called before each download, so we ensure
the session cookies before it is called
"""
self.ensure_cookie_token()
payload = {"param": {"GetDevices": {}}}
r = self.session.post(
self.urls["payload"],
data={
"data": json.dumps(payload),
"csrfToken": self.csrf_token,
},
)
r.raise_for_status()
devices = r.json()
if devices.get("error"):
self.revoke_cookie_token(open_page=True)
raise Exception(
f"Error: {devices.get('error')}, please visit {self.urls['bookall']} to revoke the csrftoken and cookie"
)
devices = r.json()["GetDevices"]["devices"]
# sleep get device first time.
logger.info("Amazon open their bot check will sleep 3s")
time.sleep(3)
if not devices:
raise Exception("No devices are bound to this account")
return [device for device in devices if "deviceSerialNumber" in device]
def get_all_books(self, start_index=0, filetype="EBOK"):
"""
TODO: refactor this function
"""
# some info
if filetype == "PDOC":
logger.info(
"It will take some time to get all PDOC books list, please wait"
)
startIndex = start_index
batchSize = 100
payload = {
"param": {
"OwnershipData": {
"sortOrder": "DESCENDING",
"sortIndex": "DATE",
"startIndex": startIndex,
"batchSize": batchSize,
"contentType": CONTENT_TYPES[filetype],
"itemStatus": ["Active"],
}
}
}
if filetype == "EBOK":
payload["param"]["OwnershipData"].update(
{
"originType": ["Purchase"],
}
)
else:
batchSize = 18
payload["param"]["OwnershipData"].update(
{
"batchSize": batchSize,
"isExtendedMYK": False,
}
)
books = []
### added by yihong0618 2022.06.27
### this ugly code is for amazon open their bot check
### if the bot check close
### will delete the try and try code
break_times = 0
while True:
# anyway sleep 0.5
time.sleep(0.5)
r = self.session.post(
self.urls["payload"],
data={"data": json.dumps(payload), "csrfToken": self.csrf_token},
)
# try three times for bot check
if r.status_code == 503:
# sleep and try again
sleep_seconds = 5 + 2 * break_times
time.sleep(sleep_seconds)
logger.info(
f"Amazon open their bot check will sleep {sleep_seconds}s and try this api again, now index: {startIndex}/{self.total_to_download}"
)
if break_times < 7:
break_times += 1
r = self.session.post(
self.urls["payload"],
data={"data": json.dumps(payload), "csrfToken": self.csrf_token},
)
if not r.ok:
if r.status_code == 503:
time.sleep(sleep_seconds)
logger.info(
f"Amazon open their bot check will sleep {sleep_seconds}s last time and try this api again, now index: {startIndex}/{self.total_to_download}"
)
logger.info(f"Next time fail will break the loop")
r = self.session.post(
self.urls["payload"],
data={
"data": json.dumps(payload),
"csrfToken": self.csrf_token,
},
)
break_times += 1
if not r.ok:
# amazon limit this api
if startIndex == 0:
logger.error(
"Amazon api limit when this download done.\n Please run it again`"
)
else:
self.not_done = True
logger.error(
"Amazon api limit when this download done.\n You can add command `--resume-from %s`",
startIndex,
)
break
result = r.json()
if not result.get("success", True):
logger.error("get all books error: %s", result.get("error"))
break
items = result["OwnershipData"]["items"]
for item in items:
if filetype == "PDOC":
item["title"] = html.unescape(item["title"])
item["authors"] = html.unescape(item.pop("author", ""))
if item.get("readStatus", "") == "READ":
self.books_info_dict[item["asin"]] = item
books.extend(items)
self.total_to_download = result["OwnershipData"]["numberOfItems"]
if result["OwnershipData"]["hasMoreItems"]:
startIndex += batchSize
payload["param"]["OwnershipData"]["startIndex"] = startIndex
else:
break
return books
def _get_reading_stats(self):
insights_url = self.urls["insights"]
r = self.session.get(insights_url)
if r.ok:
return r.json()
logger.error(f"Something is wrong get the stats data url: {insights_url}")
raise Exception(f"Something is wrong get the stats data url: {insights_url}")
def _make_one_book_stats_info(self, book_info):
book_url = self.urls["book_url"]
asin = book_info["asin"]
book = self.books_info_dict.get(asin)
if not book:
return
book_title = book.get("title", "")
# filter the brackets in the book title
book_title = re.sub(
r"(\[^)]*\)|(\([^)]*\))|(\【[^)]*\】)|(\[[^)]*\])|(\s)", "", book_title
)
book_title = book_title.replace(" ", "")
if book.get("category", "") == "KindleEBook":
book_url = book_url.format(book_id=asin)
book_title = f"[{book_title}]({book_url})"
book_authors = book.get("authors")
if len(book_authors) > 10:
book_authors = ",".join(book_authors.split(",")[:2]) + "..."
# only keep date
read = book_info.get("date_read")[:10]
acquired = (
book.get("acquiredDate", "")
.replace("", "-")
.replace("", "-")
.replace("", "")
)
return book_title, book_authors, acquired, read
def make_kindle_stats_readme(self):
ebooks = self.get_all_books(filetype="EBOK")
pdocs = self.get_all_books(filetype="PDOC")
first_ebook, first_pdoc = None, None
reading_stats = self._get_reading_stats()
read_list = reading_stats.get("goal_info", {}).get("titles_read")
if pdocs:
first_pdoc = pdocs[-1]
if first_ebook:
first_ebook = ebooks[-1]
s = MY_KINDLE_STATS_INFO_HEAD
kindle_stats_str = ""
if pdocs or ebooks:
kindle_stats_str = MY_KINDLE_STATS_INFO.format(
books_len=len(ebooks) if ebooks else 0,
pdocs_len=len(pdocs) if pdocs else 0,
first_book_title=first_ebook["title"] if first_ebook else "",
first_book_bought_date=first_ebook["acquiredDate"]
if first_ebook
else "",
first_doc_title=first_pdoc["title"] if first_pdoc else "",
first_doc_push_date=first_pdoc["acquiredDate"] if first_pdoc else "",
)
s += kindle_stats_str
s += KINDLE_TABLE_HEAD
index = 1
for book_info in read_list:
if not self._make_one_book_stats_info(book_info):
continue
book_title, book_authors, acquired, read = self._make_one_book_stats_info(
book_info
)
s += KINDLE_STAT_TEMPLATE.format(
id=str(index),
title=book_title,
authors=book_authors,
acquired=acquired,
read=read,
)
index += 1
if not os.path.exists("my_kindle_stats.md"):
with open("my_kindle_stats.md", "a") as f:
f.write(
"""<!--START_SECTION:my_kindle-->
<!--END_SECTION:my_kindle-->
"""
)
replace_readme_comments("my_kindle_stats.md", s, "my_kindle")
def download_one_book(self, book, device, index, filetype="EBOK"):
title = book["title"]
asin = book["asin"]
try:
download_url = self.urls["download"].format(
filetype,
asin,
device["deviceSerialNumber"],
device["deviceType"],
device["customerId"],
)
r = self.session.get(download_url, verify=False, stream=True)
r.raise_for_status()
name = re.findall(
r"filename\*=UTF-8''(.+)", r.headers["Content-Disposition"]
)[0]
name = urllib.parse.unquote(name)
_, extname = os.path.splitext(name)
name = title + extname
name = re.sub(r'[\\/:*?"<>|]', "_", name)
##### if you have many duplicate name books #####
if self.to_resolve_duplicate_names:
name = f"{asin}_{name}"
if len(name) > self.cut_length:
name = name[: self.cut_length - 5] + name[-5:]
total_size = r.headers["Content-length"]
out = os.path.join(self.out_dir, name)
out_dedrm = os.path.join(self.out_dedrm_dir, name)
logger.info(
f"({index + 1}/{self.total_to_download})downloading {name} {total_size} bytes"
)
with open(out, "wb") as f:
for chunk in r.iter_content(chunk_size=512):
f.write(chunk)
logger.info(f"{name} downloaded")
# for dedrm
if self.dedrm:
try:
mb = MobiBook(out)
md1, md2 = mb.get_pid_meta_info()
totalpids = get_pid_list(md1, md2, [self.device_serial_number], [])
totalpids = list(set(totalpids))
mb.make_drm_file(totalpids, out_dedrm)
except Exception as e:
logger.error("Dedrm failed for %s: %s", name, e)
pass
except Exception as e:
logger.error(str(e))
logger.error(f"Title: {title}, Asin: {asin} download failed")
def download_books(self, start_index=0, filetype="EBOK"):
# use default device
device = self.get_devices()[0]
self.device_serial_number = device["deviceSerialNumber"]
logger.info(
f"Using default device serial Number: {device['deviceSerialNumber']}"
)
books = self.get_all_books(filetype=filetype, start_index=start_index)
if start_index > 0:
print(f"resuming the download {start_index + 1}/{self.total_to_download}")
index = start_index
for book in books:
self.download_one_book(book, device, index, filetype)
index += 1
if self.not_done:
logger.error(
f"\n\nNot All done!\nAmazon api limit when this download done.\n You can add command `--resume-from {index}` to resume download next time"
)
else:
if not self.dedrm:
logger.info(
"\n\nAll done!\nNow you can use apprenticeharper's DeDRM tools "
"(https://github.com/apprenticeharper/DeDRM_tools)\n"
"with the following serial number to remove DRM: "
+ device["deviceSerialNumber"]
)
else:
logger.info(
"All done books saved in `DOWNLOAD`, dedrm files saved in `DEDRMS`"
)
with open(os.path.join(self.out_dir, "key.txt"), "w") as f:
f.write(f"Key is: {device['deviceSerialNumber']}")

View File

@@ -0,0 +1,17 @@
import re
from kindle_download_helper.config import GITHUB_README_COMMENTS
def replace_readme_comments(file_name, comment_str, comments_name):
with open(file_name, "r+", encoding="UTF-8") as f:
text = f.read()
# regrex sub from github readme comments
text = re.sub(
GITHUB_README_COMMENTS.format(name=comments_name),
r"\1{}\n\3".format(comment_str),
text,
flags=re.DOTALL,
)
f.seek(0)
f.write(text)
f.truncate()

View File

@@ -1,56 +1,3 @@
<!--START_SECTION:my_kindle--> <!--START_SECTION:my_kindle-->
## My kindle stats
- I bought 38 books
- I pushed 871 docks
- My first book is 知乎周刊·商业的细节, bought on 2013年9月16日
- My first doc is 天涯头条:《刘军宁:文明社会与言论自由,事关社会安定和对权力的制约》, bought on 2015年3月16日
| ID | Title | Authors | Acquired | Read |
| ---- | ---- | ---- | ---- | ---- |
| 1 | 其主之声 | 斯坦尼斯瓦夫·莱姆 | 2022-5-3 | 2022-05-03 |
| 2 | 奇鸟行状录 | 村上春树 | 2022-3-22 | 2022-03-22 |
| 3 | Origin原型機-第03卷 | Boichi | 2021-6-1 | 2022-02-11 |
| 4 | Origin原型機-第05卷 | Boichi | 2021-6-1 | 2022-02-11 |
| 5 | Origin原型機-第07卷 | Boichi | 2021-6-1 | 2022-02-11 |
| 6 | 第一人称单数 | 村上春树 | 2021-11-28 | 2022-02-05 |
| 7 | 挽救计划 | 安迪·威尔 | 2021-11-18 | 2021-11-18 |
| 8 | [日本名家小说集](https://www.amazon.cn/dp/B08P8KXYZ1) | 东野圭吾, 伊坂幸太郎... | 2021-5-1 | 2021-11-17 |
| 9 | 两京十五日 | 马伯庸 | 2021-10-3 | 2021-10-05 |
| 10 | 绝叫 | 叶真中显 | 2021-8-17 | 2021-08-22 |
| 11 | 炎拳-第02卷 | 105965398155@vol.moe... | 2020-11-8 | 2021-07-24 |
| 12 | Origin原型機-第02卷 | Boichi | 2021-6-1 | 2021-06-04 |
| 13 | Origin原型機-第01卷 | Boichi | 2021-6-1 | 2021-06-03 |
| 14 | 殺手寓言-第15卷 | 南勝久 | 2021-4-9 | 2021-04-10 |
| 15 | 殺手寓言-第14卷 | 南勝久 | 2021-4-6 | 2021-04-09 |
| 16 | 殺手寓言-第13卷 | 南勝久 | 2021-4-6 | 2021-04-09 |
| 17 | 殺手寓言-第12卷 | 南勝久 | 2021-4-6 | 2021-04-08 |
| 18 | 殺手寓言-第11卷 | 南勝久 | 2021-4-6 | 2021-04-08 |
| 19 | 殺手寓言-第10卷 | 南勝久 | 2021-4-6 | 2021-04-08 |
| 20 | 殺手寓言-第09卷 | 南勝久 | 2021-4-6 | 2021-04-07 |
| 21 | 殺手寓言-第08卷 | 南勝久 | 2021-4-6 | 2021-04-07 |
| 22 | 殺手寓言-第07卷 | 南勝久 | 2021-4-4 | 2021-04-05 |
| 23 | 殺手寓言-第06卷 | 南勝久 | 2021-4-4 | 2021-04-05 |
| 24 | 殺手寓言-第05卷 | 南勝久 | 2021-4-4 | 2021-04-05 |
| 25 | 殺手寓言-第04卷 | 南勝久 | 2021-4-4 | 2021-04-04 |
| 26 | 殺手寓言-第03卷 | 南勝久 | 2021-3-31 | 2021-04-03 |
| 27 | 殺手寓言-第02卷 | 南勝久 | 2021-3-31 | 2021-04-02 |
| 28 | 殺手寓言-第01卷 | 南勝久 | 2021-3-31 | 2021-04-01 |
| 29 | 夏日重現-第12卷 | 田中靖規 | 2021-2-19 | 2021-03-06 |
| 30 | 夏日重現-第11卷 | 田中靖規 | 2021-2-19 | 2021-03-05 |
| 31 | 夏日重現-第10卷 | 田中靖規 | 2021-2-19 | 2021-03-04 |
| 32 | 夏日重現-第09卷 | 田中靖規 | 2021-2-19 | 2021-03-04 |
| 33 | 夏日重現-第08卷 | 田中靖規 | 2021-2-19 | 2021-02-28 |
| 34 | 夏日重現-第07卷 | 田中靖規 | 2021-2-19 | 2021-02-28 |
| 35 | 夏日重現-第06卷 | 田中靖規 | 2021-2-19 | 2021-02-26 |
| 36 | 夏日重現-第05卷 | 田中靖規 | 2021-2-19 | 2021-02-23 |
| 37 | 夏日重現-第04卷 | 田中靖規 | 2021-2-19 | 2021-02-21 |
| 38 | 夏日重現-第03卷 | 田中靖規 | 2021-2-19 | 2021-02-21 |
| 39 | 夏日重現-第02卷 | 田中靖規 | 2021-2-19 | 2021-02-19 |
| 40 | 夏日重現-第01卷 | 田中靖規 | 2021-2-19 | 2021-02-19 |
| 41 | 1984 | George Orwell... | 2015-7-1 | 2021-02-18 |
| 42 | 炎拳-第01卷 | 105965398155@vol.moe... | 2020-11-8 | 2020-11-10 |
| 43 | 1984 | George Orwell... | 2015-7-1 | 2020-04-25 |
| 44 | 锦衣之下 | 蓝色狮 | 2020-1-7 | 2020-01-27 |
<!--END_SECTION:my_kindle--> <!--END_SECTION:my_kindle-->

23
setup.py Normal file
View File

@@ -0,0 +1,23 @@
from setuptools import find_packages, setup
setup(
name="kindle_download",
author="yihong0618",
author_email="zouzou0208@gmail.com",
url="https://github.com/yihong0618/kindle_download_helper",
license="GPL V3",
version="1.1.1",
description="Download all your kindle books and `DeDRM` script.",
long_description="Download all your kindle books and `DeDRM` script.",
packages=find_packages(),
include_package_data=True,
install_requires=[
"requests",
"browser-cookie3",
"faker",
"pywin32 ; sys_platform == 'win32'"
],
entry_points={
"console_scripts": ["kindle_download = kindle_download_helper.cli:main"],
},
)

View File

@@ -300,6 +300,6 @@ class Ui_MainDialog(object):
self.label_6.setText(QCoreApplication.translate("MainDialog", u"\u9690\u79c1\u58f0\u660e\uff1a\u6211\u4eec\u4e0d\u4f1a\u6536\u96c6\u4efb\u4f55\u7528\u6237\u4fe1\u606f\uff0c\u8bf7\u653e\u5fc3\u4f7f\u7528", None)) self.label_6.setText(QCoreApplication.translate("MainDialog", u"\u9690\u79c1\u58f0\u660e\uff1a\u6211\u4eec\u4e0d\u4f1a\u6536\u96c6\u4efb\u4f55\u7528\u6237\u4fe1\u606f\uff0c\u8bf7\u653e\u5fc3\u4f7f\u7528", None))
self.label_3.setText(QCoreApplication.translate("MainDialog", u"Copyright 2022 \u00a9 [yihong0618](https://github.com/yihong0618) and [frostming](https://github.com/frostming)", None)) self.label_3.setText(QCoreApplication.translate("MainDialog", u"Copyright 2022 \u00a9 [yihong0618](https://github.com/yihong0618) and [frostming](https://github.com/frostming)", None))
self.label_4.setText(QCoreApplication.translate("MainDialog", u"GitHub: <https://github.com/yihong0618/Kindle_download_helper>", None)) self.label_4.setText(QCoreApplication.translate("MainDialog", u"GitHub: <https://github.com/yihong0618/Kindle_download_helper>", None))
self.label_5.setText(QCoreApplication.translate("MainDialog", u"License: MIT", None)) self.label_5.setText(QCoreApplication.translate("MainDialog", u"License: GPL V3", None))
# retranslateUi # retranslateUi