mirror of
https://github.com/yihong0618/Kindle_download_helper.git
synced 2025-11-22 07:59:04 +08:00
feat: refactor and pypi
This commit is contained in:
21
LICENSE
21
LICENSE
@@ -1,21 +0,0 @@
|
|||||||
MIT License
|
|
||||||
|
|
||||||
Copyright (c) 2022 yihong, frostming and contributors
|
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
|
||||||
in the Software without restriction, including without limitation the rights
|
|
||||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
copies of the Software, and to permit persons to whom the Software is
|
|
||||||
furnished to do so, subject to the following conditions:
|
|
||||||
|
|
||||||
The above copyright notice and this permission notice shall be included in all
|
|
||||||
copies or substantial portions of the Software.
|
|
||||||
|
|
||||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
SOFTWARE.
|
|
||||||
19
README.md
19
README.md
@@ -1,6 +1,6 @@
|
|||||||
# Kindle_download_helper
|
# Kindle_download_helper
|
||||||
|
|
||||||
Download all your kindle books script.
|
Download all your kindle books and `DeDRM` script.
|
||||||
<img width="1661" alt="image" src="https://user-images.githubusercontent.com/15976103/172113700-7be0ae1f-1aae-4b50-8377-13047c63411b.png">
|
<img width="1661" alt="image" src="https://user-images.githubusercontent.com/15976103/172113700-7be0ae1f-1aae-4b50-8377-13047c63411b.png">
|
||||||
|
|
||||||
## 安装 Kindle_download_helper
|
## 安装 Kindle_download_helper
|
||||||
@@ -17,11 +17,15 @@ Download all your kindle books script.
|
|||||||
|
|
||||||
- Mac 新手指南 by @chongiscool,见 [#76](https://github.com/yihong0618/Kindle_download_helper/issues/76)
|
- Mac 新手指南 by @chongiscool,见 [#76](https://github.com/yihong0618/Kindle_download_helper/issues/76)
|
||||||
|
|
||||||
|
|
||||||
### Cli 安装使用
|
### Cli 安装使用
|
||||||
|
|
||||||
1. python3
|
1. python3
|
||||||
2. requirements
|
2. requirements
|
||||||
|
|
||||||
|
or just pip
|
||||||
|
pip3 install kindle_download
|
||||||
|
|
||||||
```python
|
```python
|
||||||
python3 --version #查看 python 版本
|
python3 --version #查看 python 版本
|
||||||
```
|
```
|
||||||
@@ -36,6 +40,7 @@ pip3 install -r requirements.txt
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
python3 kindle.py --h #查看使用参数
|
python3 kindle.py --h #查看使用参数
|
||||||
|
kindle_download --h # pip
|
||||||
|
|
||||||
usage: kindle.py [-h] [--cookie COOKIE | --cookie-file COOKIE_FILE] [--cn] [--jp] [--de] [--resume-from INDEX]
|
usage: kindle.py [-h] [--cookie COOKIE | --cookie-file COOKIE_FILE] [--cn] [--jp] [--de] [--resume-from INDEX]
|
||||||
[--cut-length CUT_LENGTH] [-o OUTDIR] [-od OUTDEDRMDIR] [-s SESSION_FILE] [--pdoc] [--resolve_duplicate_names]
|
[--cut-length CUT_LENGTH] [-o OUTDIR] [-od OUTDEDRMDIR] [-s SESSION_FILE] [--pdoc] [--resolve_duplicate_names]
|
||||||
@@ -66,7 +71,7 @@ options:
|
|||||||
--resolve_duplicate_names
|
--resolve_duplicate_names
|
||||||
Resolve duplicate names files to download
|
Resolve duplicate names files to download
|
||||||
--readme If you want to generate kindle readme stats
|
--readme If you want to generate kindle readme stats
|
||||||
--dedrm If you want to `dedrm` directly
|
--dedrm If you want to `DeDRM` directly
|
||||||
--list just list books/pdoc, not to download
|
--list just list books/pdoc, not to download
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -77,6 +82,8 @@ options:
|
|||||||
|
|
||||||
```python
|
```python
|
||||||
python3 kindle.py --dedrm --cn ## --dedrm 移除 DRM
|
python3 kindle.py --dedrm --cn ## --dedrm 移除 DRM
|
||||||
|
or
|
||||||
|
kindle_download --dedrm --cn
|
||||||
```
|
```
|
||||||
|
|
||||||
(推荐) 手动输入 cookie、csrfToken 进行下载
|
(推荐) 手动输入 cookie、csrfToken 进行下载
|
||||||
@@ -84,6 +91,9 @@ python3 kindle.py --dedrm --cn ## --dedrm 移除 DRM
|
|||||||
```python
|
```python
|
||||||
python3 kindle.py ${csrfToken} --cookie ${cookie} --dedrm --cn #下载国区 Kindle 书籍并移除 DRM
|
python3 kindle.py ${csrfToken} --cookie ${cookie} --dedrm --cn #下载国区 Kindle 书籍并移除 DRM
|
||||||
python3 kindle.py ${csrfToken} --cookie ${cookie} --dedrm #下载美区 Kindle 书籍
|
python3 kindle.py ${csrfToken} --cookie ${cookie} --dedrm #下载美区 Kindle 书籍
|
||||||
|
or
|
||||||
|
kindle_download ${csrfToken} --cookie ${cookie} --dedrm --cn #下载国区 Kindle 书籍并移除 DRM
|
||||||
|
kindle_download ${csrfToken} --cookie ${cookie} --dedrm #下载美区 Kindle 书籍
|
||||||
```
|
```
|
||||||
|
|
||||||
### 获取 cookie
|
### 获取 cookie
|
||||||
@@ -154,7 +164,7 @@ python3 kindle.py --cn --cookie ${cookie} ${csrfToken}
|
|||||||
|
|
||||||
- cookie 和 csrf token 会过期,重新刷新下 amazon 的页面就行
|
- cookie 和 csrf token 会过期,重新刷新下 amazon 的页面就行
|
||||||
- 程序会自动在命令执行的目录下创建 `DOWNLOADS` 目录,书会下载在 `DOWNLOADS` 里
|
- 程序会自动在命令执行的目录下创建 `DOWNLOADS` 目录,书会下载在 `DOWNLOADS` 里
|
||||||
- 支持 mobi 类型的文件直接 dedrm `--dedrm` 生成的文件在 `DEDRMS` 里
|
- 支持 mobi 类型的文件直接 DeDRM `--dedrm` 生成的文件在 `DEDRMS` 里
|
||||||
- 如果你用 [DeDRM_tools](https://github.com/apprenticeharper/DeDRM_tools) 解密 key 存在 key.txt 里
|
- 如果你用 [DeDRM_tools](https://github.com/apprenticeharper/DeDRM_tools) 解密 key 存在 key.txt 里
|
||||||
- 或者直接拖进 Calibre 里 please google it.
|
- 或者直接拖进 Calibre 里 please google it.
|
||||||
- 如果过程中失败了可以使用 e.g. `--resume-from ${num}`
|
- 如果过程中失败了可以使用 e.g. `--resume-from ${num}`
|
||||||
@@ -169,7 +179,8 @@ python3 kindle.py --cn --cookie ${cookie} ${csrfToken}
|
|||||||
|
|
||||||
- The cookie and csrf token will expire, just refresh the amazon page again.
|
- The cookie and csrf token will expire, just refresh the amazon page again.
|
||||||
- The program will automatically create `DOWNLOADS` directory under the command execution directory, the book will be downloaded in `DOWNLOADS` directory.
|
- The program will automatically create `DOWNLOADS` directory under the command execution directory, the book will be downloaded in `DOWNLOADS` directory.
|
||||||
- If you use [DeDRM_tools](https://github.com/apprenticeharper/DeDRM_tools) to decrypt the key, it will be stored in key.txt
|
- Support DeDRM with `--dedrm`
|
||||||
|
- or use [DeDRM_tools](https://github.com/apprenticeharper/DeDRM_tools) to decrypt the key, it will be stored in key.txt
|
||||||
- or just drag it into Calibre. Please google it.
|
- or just drag it into Calibre. Please google it.
|
||||||
- If the process fails you can use e.g. `--resume-from ${num}`
|
- If the process fails you can use e.g. `--resume-from ${num}`
|
||||||
- If the name is too long, you can add: `-cut-length 80` to truncate the file name
|
- If the name is too long, you can add: `-cut-length 80` to truncate the file name
|
||||||
|
|||||||
@@ -1816,7 +1816,7 @@ qt_resource_struct = b"\
|
|||||||
\x00\x00\x00\x0e\x00\x02\x00\x00\x00\x01\x00\x00\x00\x03\
|
\x00\x00\x00\x0e\x00\x02\x00\x00\x00\x01\x00\x00\x00\x03\
|
||||||
\x00\x00\x00\x00\x00\x00\x00\x00\
|
\x00\x00\x00\x00\x00\x00\x00\x00\
|
||||||
\x00\x00\x00$\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\
|
\x00\x00\x00$\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\
|
||||||
\x00\x00\x01\x82\x0f!\xe2\xc8\
|
\x00\x00\x01\x82X\xe5\xc8\x87\
|
||||||
"
|
"
|
||||||
|
|
||||||
def qInitResources():
|
def qInitResources():
|
||||||
|
|||||||
690
kindle.py
690
kindle.py
@@ -1,691 +1,5 @@
|
|||||||
"""
|
from kindle_download_helper import main
|
||||||
Note some download code from: https://github.com/sghctoma/bOOkp
|
|
||||||
Great Thanks
|
|
||||||
"""
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import atexit
|
|
||||||
import html
|
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import pickle
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
import urllib
|
|
||||||
from http.cookies import SimpleCookie
|
|
||||||
|
|
||||||
import requests
|
|
||||||
import urllib3
|
|
||||||
from faker import Faker
|
|
||||||
from requests.adapters import HTTPAdapter
|
|
||||||
|
|
||||||
from dedrm import MobiBook, get_pid_list
|
|
||||||
|
|
||||||
try:
|
|
||||||
import browser_cookie3
|
|
||||||
except ModuleNotFoundError:
|
|
||||||
print("not found browser_cookie3 here, you should use --cookie command")
|
|
||||||
|
|
||||||
logger = logging.getLogger("kindle")
|
|
||||||
fh = logging.FileHandler(".error_books.log")
|
|
||||||
fh.setLevel(logging.ERROR)
|
|
||||||
logger.addHandler(fh)
|
|
||||||
|
|
||||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
||||||
|
|
||||||
DEFAULT_OUT_DIR = "DOWNLOADS"
|
|
||||||
DEFAULT_OUT_DEDRM_DIR = "DEDRMS"
|
|
||||||
DEFAULT_SESSION_FILE = ".kindle_session"
|
|
||||||
|
|
||||||
|
|
||||||
KINDLE_HEADER = {
|
|
||||||
"User-Agent": Faker().user_agent(),
|
|
||||||
}
|
|
||||||
|
|
||||||
CONTENT_TYPES = {
|
|
||||||
"EBOK": "Ebook",
|
|
||||||
"PDOC": "KindlePDoc",
|
|
||||||
}
|
|
||||||
|
|
||||||
KINDLE_URLS = {
|
|
||||||
"cn": {
|
|
||||||
"bookall": "https://www.amazon.cn/hz/mycd/myx#/home/content/booksAll",
|
|
||||||
"download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}&authPool=AmazonCN",
|
|
||||||
"payload": "https://www.amazon.cn/hz/mycd/ajax",
|
|
||||||
"insights": "https://www.amazon.cn/kindle/reading/insights/data",
|
|
||||||
"book_url": "https://www.amazon.cn/dp/{book_id}",
|
|
||||||
},
|
|
||||||
"jp": {
|
|
||||||
"bookall": "https://www.amazon.jp/hz/mycd/myx#/home/content/booksAll",
|
|
||||||
"download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}",
|
|
||||||
"payload": "https://www.amazon.co.jp/hz/mycd/ajax",
|
|
||||||
"insights": "https://www.amazon.co.jp/kindle/reading/insights/data",
|
|
||||||
"book_url": "https://www.amazon.co.jp/dp/{book_id}",
|
|
||||||
},
|
|
||||||
"de": {
|
|
||||||
"bookall": "https://www.amazon.de/hz/mycd/myx#/home/content/booksAll",
|
|
||||||
"download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}",
|
|
||||||
"payload": "https://www.amazon.de/hz/mycd/ajax",
|
|
||||||
"insights": "https://www.amazon.de/kindle/reading/insights/data",
|
|
||||||
"book_url": "https://www.amazon.de/dp/{book_id}",
|
|
||||||
},
|
|
||||||
"com": {
|
|
||||||
"bookall": "https://www.amazon.com/hz/mycd/myx#/home/content/booksAll",
|
|
||||||
"download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}",
|
|
||||||
"payload": "https://www.amazon.com/hz/mycd/ajax",
|
|
||||||
"insights": "https://www.amazon.com/kindle/reading/insights/data",
|
|
||||||
"book_url": "https://www.amazon.com/dp/{book_id}",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
# for kindle stats
|
|
||||||
GITHUB_README_COMMENTS = (
|
|
||||||
"(<!--START_SECTION:{name}-->\n)(.*)(<!--END_SECTION:{name}-->\n)"
|
|
||||||
)
|
|
||||||
MY_KINDLE_STATS_INFO_HEAD = "## My Kindle Stats\n"
|
|
||||||
MY_KINDLE_STATS_INFO = "- I bought {books_len} books\n \
|
|
||||||
- I pushed {pdocs_len} docks\n \
|
|
||||||
- My first book is {first_book_title}, bought on {first_book_bought_date}\n \
|
|
||||||
- My first doc is {first_doc_title}, pushed on {first_doc_push_date}\n\n"
|
|
||||||
|
|
||||||
KINDLE_TABLE_HEAD = "| ID | Title | Authors | Acquired | Read | \n | ---- | ---- | ---- | ---- | ---- |\n"
|
|
||||||
KINDLE_STAT_TEMPLATE = "| {id} | {title} | {authors} | {acquired} | {read} |\n"
|
|
||||||
|
|
||||||
|
|
||||||
def replace_readme_comments(file_name, comment_str, comments_name):
|
|
||||||
with open(file_name, "r+", encoding="UTF-8") as f:
|
|
||||||
text = f.read()
|
|
||||||
# regrex sub from github readme comments
|
|
||||||
text = re.sub(
|
|
||||||
GITHUB_README_COMMENTS.format(name=comments_name),
|
|
||||||
r"\1{}\n\3".format(comment_str),
|
|
||||||
text,
|
|
||||||
flags=re.DOTALL,
|
|
||||||
)
|
|
||||||
f.seek(0)
|
|
||||||
f.write(text)
|
|
||||||
f.truncate()
|
|
||||||
|
|
||||||
|
|
||||||
class Kindle:
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
csrf_token=None,
|
|
||||||
domain="cn",
|
|
||||||
out_dir=DEFAULT_OUT_DIR,
|
|
||||||
out_dedrm_dir=DEFAULT_OUT_DEDRM_DIR,
|
|
||||||
cut_length=100,
|
|
||||||
session_file=DEFAULT_SESSION_FILE,
|
|
||||||
):
|
|
||||||
self.urls = KINDLE_URLS[domain]
|
|
||||||
self._csrf_token = csrf_token
|
|
||||||
self.total_to_download = 0
|
|
||||||
self.out_dir = out_dir
|
|
||||||
self.out_dedrm_dir = out_dedrm_dir
|
|
||||||
self.dedrm = False
|
|
||||||
self.cut_length = cut_length
|
|
||||||
self.not_done = False
|
|
||||||
self.session_file = session_file
|
|
||||||
self.session = self.make_session()
|
|
||||||
self.is_browser_cookie = False
|
|
||||||
self.to_resolve_duplicate_names = False
|
|
||||||
self.books_info_dict = {}
|
|
||||||
self.file_type_list = ["EBOOK", "PDOC"]
|
|
||||||
atexit.register(self.dump_session)
|
|
||||||
|
|
||||||
def set_cookie(self, cookiejar):
|
|
||||||
if not cookiejar:
|
|
||||||
raise Exception("Please make sure your amazon cookie is right")
|
|
||||||
self.session.cookies.clear()
|
|
||||||
self.session.cookies.update(cookiejar)
|
|
||||||
|
|
||||||
def set_cookie_from_string(self, cookie_string):
|
|
||||||
cj = self._parse_kindle_cookie(cookie_string)
|
|
||||||
self.set_cookie(cj)
|
|
||||||
|
|
||||||
def dump_session(self):
|
|
||||||
with open(self.session_file, "wb") as f:
|
|
||||||
pickle.dump(self.session, f)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def csrf_token(self):
|
|
||||||
if not self._csrf_token:
|
|
||||||
self._csrf_token = self._get_csrf_token()
|
|
||||||
return self._csrf_token
|
|
||||||
|
|
||||||
@csrf_token.setter
|
|
||||||
def csrf_token(self, csrf_token):
|
|
||||||
self._csrf_token = csrf_token
|
|
||||||
|
|
||||||
def ensure_session_cookie(self):
|
|
||||||
if not self.session.cookies:
|
|
||||||
logger.debug("No cookie found, trying to load from browsers")
|
|
||||||
try:
|
|
||||||
self.set_cookie(browser_cookie3.load(domain_name="amazon"))
|
|
||||||
except:
|
|
||||||
print("not found browser_cookie3 here, you should use --cookie command")
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _parse_kindle_cookie(kindle_cookie):
|
|
||||||
cookie = SimpleCookie()
|
|
||||||
cookie.load(kindle_cookie)
|
|
||||||
cookies_dict = {}
|
|
||||||
cookiejar = None
|
|
||||||
for key, morsel in cookie.items():
|
|
||||||
cookies_dict[key] = morsel.value
|
|
||||||
cookiejar = requests.utils.cookiejar_from_dict(
|
|
||||||
cookies_dict, cookiejar=None, overwrite=True
|
|
||||||
)
|
|
||||||
return cookiejar
|
|
||||||
|
|
||||||
def _get_csrf_token(self):
|
|
||||||
"""
|
|
||||||
TODO: I do not know why I have to get csrf token in the page not in this way
|
|
||||||
maybe figure out why in the future
|
|
||||||
"""
|
|
||||||
r = self.session.get(self.urls["bookall"])
|
|
||||||
match = re.search(r'var csrfToken = "(.*)";', r.text)
|
|
||||||
if not match:
|
|
||||||
self.revoke_cookie_token(open_page=self.is_browser_cookie)
|
|
||||||
raise Exception(
|
|
||||||
"Can't get the csrf token, "
|
|
||||||
f"please refresh the page at {self.urls['bookall']} and retry"
|
|
||||||
)
|
|
||||||
return match.group(1)
|
|
||||||
|
|
||||||
def refresh_browser_cookie(self):
|
|
||||||
import webbrowser
|
|
||||||
|
|
||||||
try:
|
|
||||||
webbrowser.open(self.urls["bookall"])
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def revoke_cookie_token(self, open_page=False):
|
|
||||||
# help user open it directly.
|
|
||||||
logger.info(
|
|
||||||
"Opening the url to get cookie...You can wait for the page to finish loading and retry"
|
|
||||||
)
|
|
||||||
self._csrf_token = None # reset the token
|
|
||||||
# clear the cookies so the next time it can be reloaded from the browsers
|
|
||||||
self.session.cookies.clear()
|
|
||||||
if open_page:
|
|
||||||
self.refresh_browser_cookie()
|
|
||||||
|
|
||||||
def ensure_cookie_token(self):
|
|
||||||
if not self._csrf_token:
|
|
||||||
if not self.session.cookies:
|
|
||||||
self.refresh_browser_cookie()
|
|
||||||
self.ensure_session_cookie()
|
|
||||||
self._csrf_token = self._get_csrf_token()
|
|
||||||
logger.debug(
|
|
||||||
f"session-id: { self.session.cookies.get_dict().get('session-id') }"
|
|
||||||
)
|
|
||||||
|
|
||||||
def make_session(self):
|
|
||||||
if os.path.exists(self.session_file):
|
|
||||||
with open(self.session_file, "rb") as f:
|
|
||||||
session = pickle.load(f)
|
|
||||||
else:
|
|
||||||
session = requests.Session()
|
|
||||||
session.headers.update(KINDLE_HEADER)
|
|
||||||
session.mount(
|
|
||||||
# will retry 5 times after 0.5, 1.0, 2.0, 4.0, ... seconds for
|
|
||||||
# (413, 429, 503) statuses
|
|
||||||
"https://",
|
|
||||||
HTTPAdapter(max_retries=urllib3.Retry(5, backoff_factor=0.5)),
|
|
||||||
)
|
|
||||||
|
|
||||||
logger.debug(f"user-agent: { session.headers.get('User-Agent') }")
|
|
||||||
return session
|
|
||||||
|
|
||||||
def get_devices(self):
|
|
||||||
"""
|
|
||||||
This method must be called before each download, so we ensure
|
|
||||||
the session cookies before it is called
|
|
||||||
"""
|
|
||||||
self.ensure_cookie_token()
|
|
||||||
|
|
||||||
payload = {"param": {"GetDevices": {}}}
|
|
||||||
r = self.session.post(
|
|
||||||
self.urls["payload"],
|
|
||||||
data={
|
|
||||||
"data": json.dumps(payload),
|
|
||||||
"csrfToken": self.csrf_token,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
r.raise_for_status()
|
|
||||||
devices = r.json()
|
|
||||||
if devices.get("error"):
|
|
||||||
self.revoke_cookie_token(open_page=True)
|
|
||||||
raise Exception(
|
|
||||||
f"Error: {devices.get('error')}, please visit {self.urls['bookall']} to revoke the csrftoken and cookie"
|
|
||||||
)
|
|
||||||
devices = r.json()["GetDevices"]["devices"]
|
|
||||||
# sleep get device first time.
|
|
||||||
logger.info("Amazon open their bot check will sleep 3s")
|
|
||||||
time.sleep(3)
|
|
||||||
if not devices:
|
|
||||||
raise Exception("No devices are bound to this account")
|
|
||||||
return [device for device in devices if "deviceSerialNumber" in device]
|
|
||||||
|
|
||||||
def get_all_books(self, start_index=0, filetype="EBOK"):
|
|
||||||
"""
|
|
||||||
TODO: refactor this function
|
|
||||||
"""
|
|
||||||
# some info
|
|
||||||
if filetype == "PDOC":
|
|
||||||
logger.info(
|
|
||||||
"It will take some time to get all PDOC books list, please wait"
|
|
||||||
)
|
|
||||||
startIndex = start_index
|
|
||||||
batchSize = 100
|
|
||||||
payload = {
|
|
||||||
"param": {
|
|
||||||
"OwnershipData": {
|
|
||||||
"sortOrder": "DESCENDING",
|
|
||||||
"sortIndex": "DATE",
|
|
||||||
"startIndex": startIndex,
|
|
||||||
"batchSize": batchSize,
|
|
||||||
"contentType": CONTENT_TYPES[filetype],
|
|
||||||
"itemStatus": ["Active"],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if filetype == "EBOK":
|
|
||||||
payload["param"]["OwnershipData"].update(
|
|
||||||
{
|
|
||||||
"originType": ["Purchase"],
|
|
||||||
}
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
batchSize = 18
|
|
||||||
payload["param"]["OwnershipData"].update(
|
|
||||||
{
|
|
||||||
"batchSize": batchSize,
|
|
||||||
"isExtendedMYK": False,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
books = []
|
|
||||||
### added by yihong0618 2022.06.27
|
|
||||||
### this ugly code is for amazon open their bot check
|
|
||||||
### if the bot check close
|
|
||||||
### will delete the try and try code
|
|
||||||
break_times = 0
|
|
||||||
while True:
|
|
||||||
# anyway sleep 0.5
|
|
||||||
time.sleep(0.5)
|
|
||||||
r = self.session.post(
|
|
||||||
self.urls["payload"],
|
|
||||||
data={"data": json.dumps(payload), "csrfToken": self.csrf_token},
|
|
||||||
)
|
|
||||||
# try three times for bot check
|
|
||||||
if r.status_code == 503:
|
|
||||||
# sleep and try again
|
|
||||||
sleep_seconds = 5 + 2 * break_times
|
|
||||||
time.sleep(sleep_seconds)
|
|
||||||
logger.info(
|
|
||||||
f"Amazon open their bot check will sleep {sleep_seconds}s and try this api again, now index: {startIndex}/{self.total_to_download}"
|
|
||||||
)
|
|
||||||
if break_times < 7:
|
|
||||||
break_times += 1
|
|
||||||
r = self.session.post(
|
|
||||||
self.urls["payload"],
|
|
||||||
data={"data": json.dumps(payload), "csrfToken": self.csrf_token},
|
|
||||||
)
|
|
||||||
if not r.ok:
|
|
||||||
if r.status_code == 503:
|
|
||||||
time.sleep(sleep_seconds)
|
|
||||||
logger.info(
|
|
||||||
f"Amazon open their bot check will sleep {sleep_seconds}s last time and try this api again, now index: {startIndex}/{self.total_to_download}"
|
|
||||||
)
|
|
||||||
logger.info(f"Next time fail will break the loop")
|
|
||||||
r = self.session.post(
|
|
||||||
self.urls["payload"],
|
|
||||||
data={
|
|
||||||
"data": json.dumps(payload),
|
|
||||||
"csrfToken": self.csrf_token,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
break_times += 1
|
|
||||||
if not r.ok:
|
|
||||||
# amazon limit this api
|
|
||||||
if startIndex == 0:
|
|
||||||
logger.error(
|
|
||||||
"Amazon api limit when this download done.\n Please run it again`"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
self.not_done = True
|
|
||||||
logger.error(
|
|
||||||
"Amazon api limit when this download done.\n You can add command `--resume-from %s`",
|
|
||||||
startIndex,
|
|
||||||
)
|
|
||||||
break
|
|
||||||
result = r.json()
|
|
||||||
if not result.get("success", True):
|
|
||||||
logger.error("get all books error: %s", result.get("error"))
|
|
||||||
break
|
|
||||||
items = result["OwnershipData"]["items"]
|
|
||||||
for item in items:
|
|
||||||
if filetype == "PDOC":
|
|
||||||
item["title"] = html.unescape(item["title"])
|
|
||||||
item["authors"] = html.unescape(item.pop("author", ""))
|
|
||||||
if item.get("readStatus", "") == "READ":
|
|
||||||
self.books_info_dict[item["asin"]] = item
|
|
||||||
|
|
||||||
books.extend(items)
|
|
||||||
if not self.total_to_download:
|
|
||||||
self.total_to_download = result["OwnershipData"]["numberOfItems"]
|
|
||||||
|
|
||||||
if result["OwnershipData"]["hasMoreItems"]:
|
|
||||||
startIndex += batchSize
|
|
||||||
payload["param"]["OwnershipData"]["startIndex"] = startIndex
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
return books
|
|
||||||
|
|
||||||
def _get_reading_stats(self):
|
|
||||||
insights_url = self.urls["insights"]
|
|
||||||
r = self.session.get(insights_url)
|
|
||||||
if r.ok:
|
|
||||||
return r.json()
|
|
||||||
logger.error(f"Something is wrong get the stats data url: {insights_url}")
|
|
||||||
raise Exception(f"Something is wrong get the stats data url: {insights_url}")
|
|
||||||
|
|
||||||
def _make_one_book_stats_info(self, book_info):
|
|
||||||
book_url = self.urls["book_url"]
|
|
||||||
asin = book_info["asin"]
|
|
||||||
book = self.books_info_dict.get(asin)
|
|
||||||
book_title = book.get("title", "")
|
|
||||||
# filter the brackets in the book title
|
|
||||||
book_title = re.sub(
|
|
||||||
r"(\([^)]*\))|(\([^)]*\))|(\【[^)]*\】)|(\[[^)]*\])|(\s)", "", book_title
|
|
||||||
)
|
|
||||||
book_title = book_title.replace(" ", "")
|
|
||||||
if book.get("category", "") == "KindleEBook":
|
|
||||||
book_url = book_url.format(book_id=asin)
|
|
||||||
book_title = f"[{book_title}]({book_url})"
|
|
||||||
book_authors = book.get("authors")
|
|
||||||
if len(book_authors) > 10:
|
|
||||||
book_authors = ",".join(book_authors.split(",")[:2]) + "..."
|
|
||||||
# only keep date
|
|
||||||
read = book_info.get("date_read")[:10]
|
|
||||||
acquired = (
|
|
||||||
book.get("acquiredDate", "")
|
|
||||||
.replace("年", "-")
|
|
||||||
.replace("月", "-")
|
|
||||||
.replace("日", "")
|
|
||||||
)
|
|
||||||
return book_title, book_authors, acquired, read
|
|
||||||
|
|
||||||
def make_kindle_stats_readme(self):
|
|
||||||
reading_stats = self._get_reading_stats()
|
|
||||||
read_list = reading_stats.get("goal_info", {}).get("titles_read")
|
|
||||||
ebooks = self.get_all_books(filetype="EBOK")
|
|
||||||
pdocs = self.get_all_books(filetype="PDOC")
|
|
||||||
first_ebook, first_pdoc = ebooks[-1], pdocs[-1]
|
|
||||||
print(len(self.books_info_dict.keys()), first_ebook, first_pdoc)
|
|
||||||
print(read_list)
|
|
||||||
|
|
||||||
s = MY_KINDLE_STATS_INFO_HEAD
|
|
||||||
kindle_stats_str = MY_KINDLE_STATS_INFO.format(
|
|
||||||
books_len=len(ebooks),
|
|
||||||
pdocs_len=len(pdocs),
|
|
||||||
first_book_title=first_ebook["title"],
|
|
||||||
first_book_bought_date=first_ebook["acquiredDate"],
|
|
||||||
first_doc_title=first_pdoc["title"],
|
|
||||||
first_doc_push_date=first_pdoc["acquiredDate"],
|
|
||||||
)
|
|
||||||
s += kindle_stats_str
|
|
||||||
s += KINDLE_TABLE_HEAD
|
|
||||||
index = 1
|
|
||||||
for book_info in read_list:
|
|
||||||
book_title, book_authors, acquired, read = self._make_one_book_stats_info(
|
|
||||||
book_info
|
|
||||||
)
|
|
||||||
s += KINDLE_STAT_TEMPLATE.format(
|
|
||||||
id=str(index),
|
|
||||||
title=book_title,
|
|
||||||
authors=book_authors,
|
|
||||||
acquired=acquired,
|
|
||||||
read=read,
|
|
||||||
)
|
|
||||||
index += 1
|
|
||||||
replace_readme_comments("my_kindle_stats.md", s, "my_kindle")
|
|
||||||
|
|
||||||
def download_one_book(self, book, device, index, filetype="EBOK"):
|
|
||||||
title = book["title"]
|
|
||||||
asin = book["asin"]
|
|
||||||
try:
|
|
||||||
download_url = self.urls["download"].format(
|
|
||||||
filetype,
|
|
||||||
asin,
|
|
||||||
device["deviceSerialNumber"],
|
|
||||||
device["deviceType"],
|
|
||||||
device["customerId"],
|
|
||||||
)
|
|
||||||
r = self.session.get(download_url, verify=False, stream=True)
|
|
||||||
r.raise_for_status()
|
|
||||||
name = re.findall(
|
|
||||||
r"filename\*=UTF-8''(.+)", r.headers["Content-Disposition"]
|
|
||||||
)[0]
|
|
||||||
name = urllib.parse.unquote(name)
|
|
||||||
_, extname = os.path.splitext(name)
|
|
||||||
name = title + extname
|
|
||||||
name = re.sub(r'[\\/:*?"<>|]', "_", name)
|
|
||||||
|
|
||||||
##### if you have many duplicate name books #####
|
|
||||||
if self.to_resolve_duplicate_names:
|
|
||||||
name = f"{asin}_{name}"
|
|
||||||
if len(name) > self.cut_length:
|
|
||||||
name = name[: self.cut_length - 5] + name[-5:]
|
|
||||||
total_size = r.headers["Content-length"]
|
|
||||||
|
|
||||||
out = os.path.join(self.out_dir, name)
|
|
||||||
out_dedrm = os.path.join(self.out_dedrm_dir, name)
|
|
||||||
logger.info(
|
|
||||||
f"({index + 1}/{self.total_to_download})downloading {name} {total_size} bytes"
|
|
||||||
)
|
|
||||||
with open(out, "wb") as f:
|
|
||||||
for chunk in r.iter_content(chunk_size=512):
|
|
||||||
f.write(chunk)
|
|
||||||
logger.info(f"{name} downloaded")
|
|
||||||
# for dedrm
|
|
||||||
if self.dedrm:
|
|
||||||
try:
|
|
||||||
mb = MobiBook(out)
|
|
||||||
md1, md2 = mb.get_pid_meta_info()
|
|
||||||
totalpids = get_pid_list(md1, md2, [self.device_serial_number], [])
|
|
||||||
totalpids = list(set(totalpids))
|
|
||||||
mb.make_drm_file(totalpids, out_dedrm)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error("Dedrm failed for %s: %s", name, e)
|
|
||||||
pass
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(str(e))
|
|
||||||
logger.error(f"Title: {title}, Asin: {asin} download failed")
|
|
||||||
|
|
||||||
def download_books(self, start_index=0, filetype="EBOK"):
|
|
||||||
# use default device
|
|
||||||
device = self.get_devices()[0]
|
|
||||||
self.device_serial_number = device["deviceSerialNumber"]
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
f"Using default device serial Number: {device['deviceSerialNumber']}"
|
|
||||||
)
|
|
||||||
books = self.get_all_books(filetype=filetype, start_index=start_index)
|
|
||||||
if start_index > 0:
|
|
||||||
print(f"resuming the download {start_index + 1}/{self.total_to_download}")
|
|
||||||
index = start_index
|
|
||||||
for book in books:
|
|
||||||
self.download_one_book(book, device, index, filetype)
|
|
||||||
index += 1
|
|
||||||
if self.not_done:
|
|
||||||
logger.error(
|
|
||||||
f"\n\nNot All done!\nAmazon api limit when this download done.\n You can add command `--resume-from {index}` to resume download next time"
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
if not self.dedrm:
|
|
||||||
logger.info(
|
|
||||||
"\n\nAll done!\nNow you can use apprenticeharper's DeDRM tools "
|
|
||||||
"(https://github.com/apprenticeharper/DeDRM_tools)\n"
|
|
||||||
"with the following serial number to remove DRM: "
|
|
||||||
+ device["deviceSerialNumber"]
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
logger.info(
|
|
||||||
"All done books saved in `DOWNLOAD`, dedrm files saved in `DEDRMS`"
|
|
||||||
)
|
|
||||||
with open(os.path.join(self.out_dir, "key.txt"), "w") as f:
|
|
||||||
f.write(f"Key is: {device['deviceSerialNumber']}")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
logger.setLevel(os.environ.get("LOGGING_LEVEL", "INFO"))
|
|
||||||
|
|
||||||
logger.addHandler(logging.StreamHandler())
|
|
||||||
parser = argparse.ArgumentParser()
|
|
||||||
parser.add_argument("csrf_token", help="amazon or amazon cn csrf token", nargs="?")
|
|
||||||
|
|
||||||
cookie_group = parser.add_mutually_exclusive_group()
|
|
||||||
cookie_group.add_argument(
|
|
||||||
"--cookie", dest="cookie", default="", help="amazon or amazon cn cookie"
|
|
||||||
)
|
|
||||||
cookie_group.add_argument(
|
|
||||||
"--cookie-file", dest="cookie_file", default="", help="load cookie local file"
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"--cn",
|
|
||||||
dest="domain",
|
|
||||||
action="store_const",
|
|
||||||
const="cn",
|
|
||||||
default="com",
|
|
||||||
help="if your account is an amazon.cn account",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--jp",
|
|
||||||
dest="domain",
|
|
||||||
action="store_const",
|
|
||||||
const="jp",
|
|
||||||
default="com",
|
|
||||||
help="if your account is an amazon.jp account",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--de",
|
|
||||||
dest="domain",
|
|
||||||
action="store_const",
|
|
||||||
const="de",
|
|
||||||
default="com",
|
|
||||||
help="if your account is an amazon.de account",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--resume-from",
|
|
||||||
dest="index",
|
|
||||||
type=int,
|
|
||||||
default=1,
|
|
||||||
help="resume from the index if download failed",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--cut-length",
|
|
||||||
dest="cut_length",
|
|
||||||
type=int,
|
|
||||||
default=100,
|
|
||||||
help="truncate the file name",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-o", "--outdir", default=DEFAULT_OUT_DIR, help="dwonload output dir"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-od",
|
|
||||||
"--outdedrmdir",
|
|
||||||
default=DEFAULT_OUT_DEDRM_DIR,
|
|
||||||
help="dwonload output dedrm dir",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-s",
|
|
||||||
"--session-file",
|
|
||||||
default=DEFAULT_SESSION_FILE,
|
|
||||||
help="The reusable session dump file",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--pdoc",
|
|
||||||
dest="filetype",
|
|
||||||
action="store_const",
|
|
||||||
const="PDOC",
|
|
||||||
default="EBOK",
|
|
||||||
help="to download personal documents or ebook",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--resolve_duplicate_names",
|
|
||||||
dest="resolve_duplicate_names",
|
|
||||||
action="store_true",
|
|
||||||
help="Resolve duplicate names files to download",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--readme",
|
|
||||||
dest="readme",
|
|
||||||
action="store_true",
|
|
||||||
help="If you want to generate kindle readme stats",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--dedrm",
|
|
||||||
dest="dedrm",
|
|
||||||
action="store_true",
|
|
||||||
help="If you want to `dedrm` directly",
|
|
||||||
)
|
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"--list",
|
|
||||||
dest="list_only",
|
|
||||||
action="store_true",
|
|
||||||
help="just list books/pdoc, not to download",
|
|
||||||
)
|
|
||||||
|
|
||||||
options = parser.parse_args()
|
|
||||||
|
|
||||||
if not os.path.exists(options.outdir):
|
|
||||||
os.makedirs(options.outdir)
|
|
||||||
# for dedrm
|
|
||||||
if not os.path.exists(options.outdedrmdir):
|
|
||||||
os.makedirs(options.outdedrmdir)
|
|
||||||
kindle = Kindle(
|
|
||||||
options.csrf_token,
|
|
||||||
options.domain,
|
|
||||||
options.outdir,
|
|
||||||
options.outdedrmdir,
|
|
||||||
options.cut_length,
|
|
||||||
session_file=options.session_file,
|
|
||||||
)
|
|
||||||
# other args
|
|
||||||
kindle.to_resolve_duplicate_names = options.resolve_duplicate_names
|
|
||||||
kindle.dedrm = options.dedrm
|
|
||||||
|
|
||||||
if options.cookie_file:
|
|
||||||
with open(options.cookie_file, "r") as f:
|
|
||||||
kindle.set_cookie_from_string(f.read())
|
|
||||||
elif options.cookie:
|
|
||||||
kindle.set_cookie_from_string(options.cookie)
|
|
||||||
else:
|
|
||||||
kindle.is_browser_cookie = True
|
|
||||||
|
|
||||||
if options.list_only:
|
|
||||||
kindle.get_devices()
|
|
||||||
print(
|
|
||||||
json.dumps(
|
|
||||||
kindle.get_all_books(filetype=options.filetype),
|
|
||||||
indent=4,
|
|
||||||
ensure_ascii=False,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
exit()
|
|
||||||
|
|
||||||
if options.readme:
|
|
||||||
# generate readme stats
|
|
||||||
kindle.make_kindle_stats_readme()
|
|
||||||
else:
|
|
||||||
kindle.download_books(start_index=options.index - 1, filetype=options.filetype)
|
|
||||||
|
|||||||
@@ -340,7 +340,7 @@ hr { height: 1px; border-width: 0; }
|
|||||||
<item>
|
<item>
|
||||||
<widget class="QLabel" name="label_5">
|
<widget class="QLabel" name="label_5">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>License: MIT</string>
|
<string>License: GPL V3</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from typing import NamedTuple
|
|||||||
|
|
||||||
from PySide6 import QtCore, QtGui, QtWidgets
|
from PySide6 import QtCore, QtGui, QtWidgets
|
||||||
|
|
||||||
import kindle
|
from kindle_download_helper import kindle as kindle
|
||||||
from ui_kindle import Ui_MainDialog
|
from ui_kindle import Ui_MainDialog
|
||||||
|
|
||||||
logger = logging.getLogger("kindle")
|
logger = logging.getLogger("kindle")
|
||||||
|
|||||||
2
kindle_download_helper/__init__.py
Normal file
2
kindle_download_helper/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
from kindle_download_helper.cli import main
|
||||||
|
from kindle_download_helper import kindle
|
||||||
1
kindle_download_helper/__main__.py
Normal file
1
kindle_download_helper/__main__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
from cli import main
|
||||||
170
kindle_download_helper/cli.py
Normal file
170
kindle_download_helper/cli.py
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
from kindle_download_helper.kindle import Kindle
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import urllib3
|
||||||
|
import logging
|
||||||
|
import json
|
||||||
|
|
||||||
|
from kindle_download_helper.config import (
|
||||||
|
DEFAULT_OUT_DIR,
|
||||||
|
DEFAULT_SESSION_FILE,
|
||||||
|
DEFAULT_OUT_DEDRM_DIR,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger("kindle")
|
||||||
|
fh = logging.FileHandler(".error_books.log")
|
||||||
|
fh.setLevel(logging.ERROR)
|
||||||
|
logger.addHandler(fh)
|
||||||
|
|
||||||
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
logger.setLevel(os.environ.get("LOGGING_LEVEL", "INFO"))
|
||||||
|
|
||||||
|
logger.addHandler(logging.StreamHandler())
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("csrf_token", help="amazon or amazon cn csrf token", nargs="?")
|
||||||
|
|
||||||
|
cookie_group = parser.add_mutually_exclusive_group()
|
||||||
|
cookie_group.add_argument(
|
||||||
|
"--cookie", dest="cookie", default="", help="amazon or amazon cn cookie"
|
||||||
|
)
|
||||||
|
cookie_group.add_argument(
|
||||||
|
"--cookie-file", dest="cookie_file", default="", help="load cookie local file"
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--cn",
|
||||||
|
dest="domain",
|
||||||
|
action="store_const",
|
||||||
|
const="cn",
|
||||||
|
default="com",
|
||||||
|
help="if your account is an amazon.cn account",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--jp",
|
||||||
|
dest="domain",
|
||||||
|
action="store_const",
|
||||||
|
const="jp",
|
||||||
|
default="com",
|
||||||
|
help="if your account is an amazon.jp account",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--de",
|
||||||
|
dest="domain",
|
||||||
|
action="store_const",
|
||||||
|
const="de",
|
||||||
|
default="com",
|
||||||
|
help="if your account is an amazon.de account",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--resume-from",
|
||||||
|
dest="index",
|
||||||
|
type=int,
|
||||||
|
default=1,
|
||||||
|
help="resume from the index if download failed",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--cut-length",
|
||||||
|
dest="cut_length",
|
||||||
|
type=int,
|
||||||
|
default=100,
|
||||||
|
help="truncate the file name",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-o", "--outdir", default=DEFAULT_OUT_DIR, help="dwonload output dir"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-od",
|
||||||
|
"--outdedrmdir",
|
||||||
|
default=DEFAULT_OUT_DEDRM_DIR,
|
||||||
|
help="dwonload output dedrm dir",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-s",
|
||||||
|
"--session-file",
|
||||||
|
default=DEFAULT_SESSION_FILE,
|
||||||
|
help="The reusable session dump file",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--pdoc",
|
||||||
|
dest="filetype",
|
||||||
|
action="store_const",
|
||||||
|
const="PDOC",
|
||||||
|
default="EBOK",
|
||||||
|
help="to download personal documents or ebook",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--resolve_duplicate_names",
|
||||||
|
dest="resolve_duplicate_names",
|
||||||
|
action="store_true",
|
||||||
|
help="Resolve duplicate names files to download",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--readme",
|
||||||
|
dest="readme",
|
||||||
|
action="store_true",
|
||||||
|
help="If you want to generate kindle readme stats",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--dedrm",
|
||||||
|
dest="dedrm",
|
||||||
|
action="store_true",
|
||||||
|
help="If you want to `dedrm` directly",
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--list",
|
||||||
|
dest="list_only",
|
||||||
|
action="store_true",
|
||||||
|
help="just list books/pdoc, not to download",
|
||||||
|
)
|
||||||
|
|
||||||
|
options = parser.parse_args()
|
||||||
|
|
||||||
|
if not os.path.exists(options.outdir):
|
||||||
|
os.makedirs(options.outdir)
|
||||||
|
# for dedrm
|
||||||
|
if not os.path.exists(options.outdedrmdir):
|
||||||
|
os.makedirs(options.outdedrmdir)
|
||||||
|
kindle = Kindle(
|
||||||
|
options.csrf_token,
|
||||||
|
options.domain,
|
||||||
|
options.outdir,
|
||||||
|
options.outdedrmdir,
|
||||||
|
options.cut_length,
|
||||||
|
session_file=options.session_file,
|
||||||
|
)
|
||||||
|
# other args
|
||||||
|
kindle.to_resolve_duplicate_names = options.resolve_duplicate_names
|
||||||
|
kindle.dedrm = options.dedrm
|
||||||
|
|
||||||
|
if options.cookie_file:
|
||||||
|
with open(options.cookie_file, "r") as f:
|
||||||
|
kindle.set_cookie_from_string(f.read())
|
||||||
|
elif options.cookie:
|
||||||
|
kindle.set_cookie_from_string(options.cookie)
|
||||||
|
else:
|
||||||
|
kindle.is_browser_cookie = True
|
||||||
|
|
||||||
|
if options.list_only:
|
||||||
|
kindle.get_devices()
|
||||||
|
print(
|
||||||
|
json.dumps(
|
||||||
|
kindle.get_all_books(filetype=options.filetype),
|
||||||
|
indent=4,
|
||||||
|
ensure_ascii=False,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
exit()
|
||||||
|
|
||||||
|
if options.readme:
|
||||||
|
# generate readme stats
|
||||||
|
kindle.make_kindle_stats_readme()
|
||||||
|
else:
|
||||||
|
kindle.download_books(start_index=options.index - 1, filetype=options.filetype)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
56
kindle_download_helper/config.py
Normal file
56
kindle_download_helper/config.py
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
from faker import Faker
|
||||||
|
|
||||||
|
DEFAULT_OUT_DIR = "DOWNLOADS"
|
||||||
|
DEFAULT_OUT_DEDRM_DIR = "DEDRMS"
|
||||||
|
DEFAULT_SESSION_FILE = ".kindle_session"
|
||||||
|
|
||||||
|
|
||||||
|
KINDLE_HEADER = {
|
||||||
|
"User-Agent": Faker().user_agent(),
|
||||||
|
}
|
||||||
|
|
||||||
|
CONTENT_TYPES = {
|
||||||
|
"EBOK": "Ebook",
|
||||||
|
"PDOC": "KindlePDoc",
|
||||||
|
}
|
||||||
|
|
||||||
|
KINDLE_URLS = {
|
||||||
|
"cn": {
|
||||||
|
"bookall": "https://www.amazon.cn/hz/mycd/myx#/home/content/booksAll",
|
||||||
|
"download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}&authPool=AmazonCN",
|
||||||
|
"payload": "https://www.amazon.cn/hz/mycd/ajax",
|
||||||
|
"insights": "https://www.amazon.cn/kindle/reading/insights/data",
|
||||||
|
"book_url": "https://www.amazon.cn/dp/{book_id}",
|
||||||
|
},
|
||||||
|
"jp": {
|
||||||
|
"bookall": "https://www.amazon.jp/hz/mycd/myx#/home/content/booksAll",
|
||||||
|
"download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}",
|
||||||
|
"payload": "https://www.amazon.co.jp/hz/mycd/ajax",
|
||||||
|
"insights": "https://www.amazon.co.jp/kindle/reading/insights/data",
|
||||||
|
"book_url": "https://www.amazon.co.jp/dp/{book_id}",
|
||||||
|
},
|
||||||
|
"de": {
|
||||||
|
"bookall": "https://www.amazon.de/hz/mycd/myx#/home/content/booksAll",
|
||||||
|
"download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}",
|
||||||
|
"payload": "https://www.amazon.de/hz/mycd/ajax",
|
||||||
|
"insights": "https://www.amazon.de/kindle/reading/insights/data",
|
||||||
|
"book_url": "https://www.amazon.de/dp/{book_id}",
|
||||||
|
},
|
||||||
|
"com": {
|
||||||
|
"bookall": "https://www.amazon.com/hz/mycd/myx#/home/content/booksAll",
|
||||||
|
"download": "https://cde-ta-g7g.amazon.com/FionaCDEServiceEngine/FSDownloadContent?type={}&key={}&fsn={}&device_type={}&customerId={}",
|
||||||
|
"payload": "https://www.amazon.com/hz/mycd/ajax",
|
||||||
|
"insights": "https://www.amazon.com/kindle/reading/insights/data",
|
||||||
|
"book_url": "https://www.amazon.com/dp/{book_id}",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# for kindle stats
|
||||||
|
GITHUB_README_COMMENTS = (
|
||||||
|
"(<!--START_SECTION:{name}-->\n)(.*)(<!--END_SECTION:{name}-->\n)"
|
||||||
|
)
|
||||||
|
MY_KINDLE_STATS_INFO_HEAD = "## My Kindle Stats\n"
|
||||||
|
MY_KINDLE_STATS_INFO = "- I bought {books_len} books\n- I pushed {pdocs_len} docks\n- My first book is {first_book_title}, bought on {first_book_bought_date}\n- My first doc is {first_doc_title}, pushed on {first_doc_push_date}\n\n"
|
||||||
|
|
||||||
|
KINDLE_TABLE_HEAD = "| ID | Title | Authors | Acquired | Read | \n | ---- | ---- | ---- | ---- | ---- |\n"
|
||||||
|
KINDLE_STAT_TEMPLATE = "| {id} | {title} | {authors} | {acquired} | {read} |\n"
|
||||||
499
kindle_download_helper/kindle.py
Normal file
499
kindle_download_helper/kindle.py
Normal file
@@ -0,0 +1,499 @@
|
|||||||
|
"""
|
||||||
|
Note some download code from: https://github.com/sghctoma/bOOkp
|
||||||
|
Great Thanks
|
||||||
|
"""
|
||||||
|
|
||||||
|
import atexit
|
||||||
|
import html
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import pickle
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import urllib
|
||||||
|
from http.cookies import SimpleCookie
|
||||||
|
|
||||||
|
import requests
|
||||||
|
import urllib3
|
||||||
|
from requests.adapters import HTTPAdapter
|
||||||
|
|
||||||
|
from kindle_download_helper.dedrm import MobiBook, get_pid_list
|
||||||
|
from kindle_download_helper.config import (
|
||||||
|
KINDLE_URLS,
|
||||||
|
DEFAULT_OUT_DIR,
|
||||||
|
DEFAULT_SESSION_FILE,
|
||||||
|
DEFAULT_OUT_DEDRM_DIR,
|
||||||
|
CONTENT_TYPES,
|
||||||
|
KINDLE_STAT_TEMPLATE,
|
||||||
|
)
|
||||||
|
from kindle_download_helper.config import (
|
||||||
|
MY_KINDLE_STATS_INFO_HEAD,
|
||||||
|
KINDLE_HEADER,
|
||||||
|
MY_KINDLE_STATS_INFO,
|
||||||
|
KINDLE_TABLE_HEAD,
|
||||||
|
)
|
||||||
|
from kindle_download_helper.utils import replace_readme_comments
|
||||||
|
|
||||||
|
try:
|
||||||
|
import browser_cookie3
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
print("not found browser_cookie3 here, you should use --cookie command")
|
||||||
|
|
||||||
|
logger = logging.getLogger("kindle")
|
||||||
|
fh = logging.FileHandler(".error_books.log")
|
||||||
|
fh.setLevel(logging.ERROR)
|
||||||
|
logger.addHandler(fh)
|
||||||
|
|
||||||
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
|
|
||||||
|
|
||||||
|
class Kindle:
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
csrf_token=None,
|
||||||
|
domain="cn",
|
||||||
|
out_dir=DEFAULT_OUT_DIR,
|
||||||
|
out_dedrm_dir=DEFAULT_OUT_DEDRM_DIR,
|
||||||
|
cut_length=100,
|
||||||
|
session_file=DEFAULT_SESSION_FILE,
|
||||||
|
):
|
||||||
|
self.urls = KINDLE_URLS[domain]
|
||||||
|
self._csrf_token = csrf_token
|
||||||
|
self.total_to_download = 0
|
||||||
|
self.out_dir = out_dir
|
||||||
|
self.out_dedrm_dir = out_dedrm_dir
|
||||||
|
self.dedrm = False
|
||||||
|
self.cut_length = cut_length
|
||||||
|
self.not_done = False
|
||||||
|
self.session_file = session_file
|
||||||
|
self.session = self.make_session()
|
||||||
|
self.is_browser_cookie = False
|
||||||
|
self.to_resolve_duplicate_names = False
|
||||||
|
self.books_info_dict = {}
|
||||||
|
self.file_type_list = ["EBOOK", "PDOC"]
|
||||||
|
atexit.register(self.dump_session)
|
||||||
|
|
||||||
|
def set_cookie(self, cookiejar):
|
||||||
|
if not cookiejar:
|
||||||
|
raise Exception("Please make sure your amazon cookie is right")
|
||||||
|
self.session.cookies.clear()
|
||||||
|
self.session.cookies.update(cookiejar)
|
||||||
|
|
||||||
|
def set_cookie_from_string(self, cookie_string):
|
||||||
|
cj = self._parse_kindle_cookie(cookie_string)
|
||||||
|
self.set_cookie(cj)
|
||||||
|
|
||||||
|
def dump_session(self):
|
||||||
|
with open(self.session_file, "wb") as f:
|
||||||
|
pickle.dump(self.session, f)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def csrf_token(self):
|
||||||
|
if not self._csrf_token:
|
||||||
|
self._csrf_token = self._get_csrf_token()
|
||||||
|
return self._csrf_token
|
||||||
|
|
||||||
|
@csrf_token.setter
|
||||||
|
def csrf_token(self, csrf_token):
|
||||||
|
self._csrf_token = csrf_token
|
||||||
|
|
||||||
|
def ensure_session_cookie(self):
|
||||||
|
if not self.session.cookies:
|
||||||
|
logger.debug("No cookie found, trying to load from browsers")
|
||||||
|
try:
|
||||||
|
self.set_cookie(browser_cookie3.load(domain_name="amazon"))
|
||||||
|
except:
|
||||||
|
print("not found browser_cookie3 here, you should use --cookie command")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_kindle_cookie(kindle_cookie):
|
||||||
|
cookie = SimpleCookie()
|
||||||
|
cookie.load(kindle_cookie)
|
||||||
|
cookies_dict = {}
|
||||||
|
cookiejar = None
|
||||||
|
for key, morsel in cookie.items():
|
||||||
|
cookies_dict[key] = morsel.value
|
||||||
|
cookiejar = requests.utils.cookiejar_from_dict(
|
||||||
|
cookies_dict, cookiejar=None, overwrite=True
|
||||||
|
)
|
||||||
|
return cookiejar
|
||||||
|
|
||||||
|
def _get_csrf_token(self):
|
||||||
|
"""
|
||||||
|
TODO: I do not know why I have to get csrf token in the page not in this way
|
||||||
|
maybe figure out why in the future
|
||||||
|
"""
|
||||||
|
r = self.session.get(self.urls["bookall"])
|
||||||
|
match = re.search(r'var csrfToken = "(.*)";', r.text)
|
||||||
|
if not match:
|
||||||
|
self.revoke_cookie_token(open_page=self.is_browser_cookie)
|
||||||
|
raise Exception(
|
||||||
|
"Can't get the csrf token, "
|
||||||
|
f"please refresh the page at {self.urls['bookall']} and retry"
|
||||||
|
)
|
||||||
|
return match.group(1)
|
||||||
|
|
||||||
|
def refresh_browser_cookie(self):
|
||||||
|
import webbrowser
|
||||||
|
|
||||||
|
try:
|
||||||
|
webbrowser.open(self.urls["bookall"])
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def revoke_cookie_token(self, open_page=False):
|
||||||
|
# help user open it directly.
|
||||||
|
logger.info(
|
||||||
|
"Opening the url to get cookie...You can wait for the page to finish loading and retry"
|
||||||
|
)
|
||||||
|
self._csrf_token = None # reset the token
|
||||||
|
# clear the cookies so the next time it can be reloaded from the browsers
|
||||||
|
self.session.cookies.clear()
|
||||||
|
if open_page:
|
||||||
|
self.refresh_browser_cookie()
|
||||||
|
|
||||||
|
def ensure_cookie_token(self):
|
||||||
|
if not self._csrf_token:
|
||||||
|
if not self.session.cookies:
|
||||||
|
self.refresh_browser_cookie()
|
||||||
|
self.ensure_session_cookie()
|
||||||
|
self._csrf_token = self._get_csrf_token()
|
||||||
|
logger.debug(
|
||||||
|
f"session-id: { self.session.cookies.get_dict().get('session-id') }"
|
||||||
|
)
|
||||||
|
|
||||||
|
def make_session(self):
|
||||||
|
if os.path.exists(self.session_file):
|
||||||
|
with open(self.session_file, "rb") as f:
|
||||||
|
session = pickle.load(f)
|
||||||
|
else:
|
||||||
|
session = requests.Session()
|
||||||
|
session.headers.update(KINDLE_HEADER)
|
||||||
|
session.mount(
|
||||||
|
# will retry 5 times after 0.5, 1.0, 2.0, 4.0, ... seconds for
|
||||||
|
# (413, 429, 503) statuses
|
||||||
|
"https://",
|
||||||
|
HTTPAdapter(max_retries=urllib3.Retry(5, backoff_factor=0.5)),
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug(f"user-agent: { session.headers.get('User-Agent') }")
|
||||||
|
return session
|
||||||
|
|
||||||
|
def get_devices(self):
|
||||||
|
"""
|
||||||
|
This method must be called before each download, so we ensure
|
||||||
|
the session cookies before it is called
|
||||||
|
"""
|
||||||
|
self.ensure_cookie_token()
|
||||||
|
|
||||||
|
payload = {"param": {"GetDevices": {}}}
|
||||||
|
r = self.session.post(
|
||||||
|
self.urls["payload"],
|
||||||
|
data={
|
||||||
|
"data": json.dumps(payload),
|
||||||
|
"csrfToken": self.csrf_token,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
r.raise_for_status()
|
||||||
|
devices = r.json()
|
||||||
|
if devices.get("error"):
|
||||||
|
self.revoke_cookie_token(open_page=True)
|
||||||
|
raise Exception(
|
||||||
|
f"Error: {devices.get('error')}, please visit {self.urls['bookall']} to revoke the csrftoken and cookie"
|
||||||
|
)
|
||||||
|
devices = r.json()["GetDevices"]["devices"]
|
||||||
|
# sleep get device first time.
|
||||||
|
logger.info("Amazon open their bot check will sleep 3s")
|
||||||
|
time.sleep(3)
|
||||||
|
if not devices:
|
||||||
|
raise Exception("No devices are bound to this account")
|
||||||
|
return [device for device in devices if "deviceSerialNumber" in device]
|
||||||
|
|
||||||
|
def get_all_books(self, start_index=0, filetype="EBOK"):
|
||||||
|
"""
|
||||||
|
TODO: refactor this function
|
||||||
|
"""
|
||||||
|
# some info
|
||||||
|
if filetype == "PDOC":
|
||||||
|
logger.info(
|
||||||
|
"It will take some time to get all PDOC books list, please wait"
|
||||||
|
)
|
||||||
|
startIndex = start_index
|
||||||
|
batchSize = 100
|
||||||
|
payload = {
|
||||||
|
"param": {
|
||||||
|
"OwnershipData": {
|
||||||
|
"sortOrder": "DESCENDING",
|
||||||
|
"sortIndex": "DATE",
|
||||||
|
"startIndex": startIndex,
|
||||||
|
"batchSize": batchSize,
|
||||||
|
"contentType": CONTENT_TYPES[filetype],
|
||||||
|
"itemStatus": ["Active"],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if filetype == "EBOK":
|
||||||
|
payload["param"]["OwnershipData"].update(
|
||||||
|
{
|
||||||
|
"originType": ["Purchase"],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
batchSize = 18
|
||||||
|
payload["param"]["OwnershipData"].update(
|
||||||
|
{
|
||||||
|
"batchSize": batchSize,
|
||||||
|
"isExtendedMYK": False,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
books = []
|
||||||
|
### added by yihong0618 2022.06.27
|
||||||
|
### this ugly code is for amazon open their bot check
|
||||||
|
### if the bot check close
|
||||||
|
### will delete the try and try code
|
||||||
|
break_times = 0
|
||||||
|
while True:
|
||||||
|
# anyway sleep 0.5
|
||||||
|
time.sleep(0.5)
|
||||||
|
r = self.session.post(
|
||||||
|
self.urls["payload"],
|
||||||
|
data={"data": json.dumps(payload), "csrfToken": self.csrf_token},
|
||||||
|
)
|
||||||
|
# try three times for bot check
|
||||||
|
if r.status_code == 503:
|
||||||
|
# sleep and try again
|
||||||
|
sleep_seconds = 5 + 2 * break_times
|
||||||
|
time.sleep(sleep_seconds)
|
||||||
|
logger.info(
|
||||||
|
f"Amazon open their bot check will sleep {sleep_seconds}s and try this api again, now index: {startIndex}/{self.total_to_download}"
|
||||||
|
)
|
||||||
|
if break_times < 7:
|
||||||
|
break_times += 1
|
||||||
|
r = self.session.post(
|
||||||
|
self.urls["payload"],
|
||||||
|
data={"data": json.dumps(payload), "csrfToken": self.csrf_token},
|
||||||
|
)
|
||||||
|
if not r.ok:
|
||||||
|
if r.status_code == 503:
|
||||||
|
time.sleep(sleep_seconds)
|
||||||
|
logger.info(
|
||||||
|
f"Amazon open their bot check will sleep {sleep_seconds}s last time and try this api again, now index: {startIndex}/{self.total_to_download}"
|
||||||
|
)
|
||||||
|
logger.info(f"Next time fail will break the loop")
|
||||||
|
r = self.session.post(
|
||||||
|
self.urls["payload"],
|
||||||
|
data={
|
||||||
|
"data": json.dumps(payload),
|
||||||
|
"csrfToken": self.csrf_token,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
break_times += 1
|
||||||
|
if not r.ok:
|
||||||
|
# amazon limit this api
|
||||||
|
if startIndex == 0:
|
||||||
|
logger.error(
|
||||||
|
"Amazon api limit when this download done.\n Please run it again`"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.not_done = True
|
||||||
|
logger.error(
|
||||||
|
"Amazon api limit when this download done.\n You can add command `--resume-from %s`",
|
||||||
|
startIndex,
|
||||||
|
)
|
||||||
|
break
|
||||||
|
result = r.json()
|
||||||
|
if not result.get("success", True):
|
||||||
|
logger.error("get all books error: %s", result.get("error"))
|
||||||
|
break
|
||||||
|
items = result["OwnershipData"]["items"]
|
||||||
|
for item in items:
|
||||||
|
if filetype == "PDOC":
|
||||||
|
item["title"] = html.unescape(item["title"])
|
||||||
|
item["authors"] = html.unescape(item.pop("author", ""))
|
||||||
|
if item.get("readStatus", "") == "READ":
|
||||||
|
self.books_info_dict[item["asin"]] = item
|
||||||
|
|
||||||
|
books.extend(items)
|
||||||
|
self.total_to_download = result["OwnershipData"]["numberOfItems"]
|
||||||
|
|
||||||
|
if result["OwnershipData"]["hasMoreItems"]:
|
||||||
|
startIndex += batchSize
|
||||||
|
payload["param"]["OwnershipData"]["startIndex"] = startIndex
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
return books
|
||||||
|
|
||||||
|
def _get_reading_stats(self):
|
||||||
|
insights_url = self.urls["insights"]
|
||||||
|
r = self.session.get(insights_url)
|
||||||
|
if r.ok:
|
||||||
|
return r.json()
|
||||||
|
logger.error(f"Something is wrong get the stats data url: {insights_url}")
|
||||||
|
raise Exception(f"Something is wrong get the stats data url: {insights_url}")
|
||||||
|
|
||||||
|
def _make_one_book_stats_info(self, book_info):
|
||||||
|
book_url = self.urls["book_url"]
|
||||||
|
asin = book_info["asin"]
|
||||||
|
book = self.books_info_dict.get(asin)
|
||||||
|
if not book:
|
||||||
|
return
|
||||||
|
book_title = book.get("title", "")
|
||||||
|
# filter the brackets in the book title
|
||||||
|
book_title = re.sub(
|
||||||
|
r"(\([^)]*\))|(\([^)]*\))|(\【[^)]*\】)|(\[[^)]*\])|(\s)", "", book_title
|
||||||
|
)
|
||||||
|
book_title = book_title.replace(" ", "")
|
||||||
|
if book.get("category", "") == "KindleEBook":
|
||||||
|
book_url = book_url.format(book_id=asin)
|
||||||
|
book_title = f"[{book_title}]({book_url})"
|
||||||
|
book_authors = book.get("authors")
|
||||||
|
if len(book_authors) > 10:
|
||||||
|
book_authors = ",".join(book_authors.split(",")[:2]) + "..."
|
||||||
|
# only keep date
|
||||||
|
read = book_info.get("date_read")[:10]
|
||||||
|
acquired = (
|
||||||
|
book.get("acquiredDate", "")
|
||||||
|
.replace("年", "-")
|
||||||
|
.replace("月", "-")
|
||||||
|
.replace("日", "")
|
||||||
|
)
|
||||||
|
return book_title, book_authors, acquired, read
|
||||||
|
|
||||||
|
def make_kindle_stats_readme(self):
|
||||||
|
ebooks = self.get_all_books(filetype="EBOK")
|
||||||
|
pdocs = self.get_all_books(filetype="PDOC")
|
||||||
|
first_ebook, first_pdoc = None, None
|
||||||
|
reading_stats = self._get_reading_stats()
|
||||||
|
read_list = reading_stats.get("goal_info", {}).get("titles_read")
|
||||||
|
if pdocs:
|
||||||
|
first_pdoc = pdocs[-1]
|
||||||
|
if first_ebook:
|
||||||
|
first_ebook = ebooks[-1]
|
||||||
|
|
||||||
|
s = MY_KINDLE_STATS_INFO_HEAD
|
||||||
|
kindle_stats_str = ""
|
||||||
|
if pdocs or ebooks:
|
||||||
|
kindle_stats_str = MY_KINDLE_STATS_INFO.format(
|
||||||
|
books_len=len(ebooks) if ebooks else 0,
|
||||||
|
pdocs_len=len(pdocs) if pdocs else 0,
|
||||||
|
first_book_title=first_ebook["title"] if first_ebook else "",
|
||||||
|
first_book_bought_date=first_ebook["acquiredDate"]
|
||||||
|
if first_ebook
|
||||||
|
else "",
|
||||||
|
first_doc_title=first_pdoc["title"] if first_pdoc else "",
|
||||||
|
first_doc_push_date=first_pdoc["acquiredDate"] if first_pdoc else "",
|
||||||
|
)
|
||||||
|
s += kindle_stats_str
|
||||||
|
s += KINDLE_TABLE_HEAD
|
||||||
|
index = 1
|
||||||
|
for book_info in read_list:
|
||||||
|
if not self._make_one_book_stats_info(book_info):
|
||||||
|
continue
|
||||||
|
book_title, book_authors, acquired, read = self._make_one_book_stats_info(
|
||||||
|
book_info
|
||||||
|
)
|
||||||
|
s += KINDLE_STAT_TEMPLATE.format(
|
||||||
|
id=str(index),
|
||||||
|
title=book_title,
|
||||||
|
authors=book_authors,
|
||||||
|
acquired=acquired,
|
||||||
|
read=read,
|
||||||
|
)
|
||||||
|
index += 1
|
||||||
|
if not os.path.exists("my_kindle_stats.md"):
|
||||||
|
with open("my_kindle_stats.md", "a") as f:
|
||||||
|
f.write(
|
||||||
|
"""<!--START_SECTION:my_kindle-->
|
||||||
|
<!--END_SECTION:my_kindle-->
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
replace_readme_comments("my_kindle_stats.md", s, "my_kindle")
|
||||||
|
|
||||||
|
def download_one_book(self, book, device, index, filetype="EBOK"):
|
||||||
|
title = book["title"]
|
||||||
|
asin = book["asin"]
|
||||||
|
try:
|
||||||
|
download_url = self.urls["download"].format(
|
||||||
|
filetype,
|
||||||
|
asin,
|
||||||
|
device["deviceSerialNumber"],
|
||||||
|
device["deviceType"],
|
||||||
|
device["customerId"],
|
||||||
|
)
|
||||||
|
r = self.session.get(download_url, verify=False, stream=True)
|
||||||
|
r.raise_for_status()
|
||||||
|
name = re.findall(
|
||||||
|
r"filename\*=UTF-8''(.+)", r.headers["Content-Disposition"]
|
||||||
|
)[0]
|
||||||
|
name = urllib.parse.unquote(name)
|
||||||
|
_, extname = os.path.splitext(name)
|
||||||
|
name = title + extname
|
||||||
|
name = re.sub(r'[\\/:*?"<>|]', "_", name)
|
||||||
|
|
||||||
|
##### if you have many duplicate name books #####
|
||||||
|
if self.to_resolve_duplicate_names:
|
||||||
|
name = f"{asin}_{name}"
|
||||||
|
if len(name) > self.cut_length:
|
||||||
|
name = name[: self.cut_length - 5] + name[-5:]
|
||||||
|
total_size = r.headers["Content-length"]
|
||||||
|
|
||||||
|
out = os.path.join(self.out_dir, name)
|
||||||
|
out_dedrm = os.path.join(self.out_dedrm_dir, name)
|
||||||
|
logger.info(
|
||||||
|
f"({index + 1}/{self.total_to_download})downloading {name} {total_size} bytes"
|
||||||
|
)
|
||||||
|
with open(out, "wb") as f:
|
||||||
|
for chunk in r.iter_content(chunk_size=512):
|
||||||
|
f.write(chunk)
|
||||||
|
logger.info(f"{name} downloaded")
|
||||||
|
# for dedrm
|
||||||
|
if self.dedrm:
|
||||||
|
try:
|
||||||
|
mb = MobiBook(out)
|
||||||
|
md1, md2 = mb.get_pid_meta_info()
|
||||||
|
totalpids = get_pid_list(md1, md2, [self.device_serial_number], [])
|
||||||
|
totalpids = list(set(totalpids))
|
||||||
|
mb.make_drm_file(totalpids, out_dedrm)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Dedrm failed for %s: %s", name, e)
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(str(e))
|
||||||
|
logger.error(f"Title: {title}, Asin: {asin} download failed")
|
||||||
|
|
||||||
|
def download_books(self, start_index=0, filetype="EBOK"):
|
||||||
|
# use default device
|
||||||
|
device = self.get_devices()[0]
|
||||||
|
self.device_serial_number = device["deviceSerialNumber"]
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Using default device serial Number: {device['deviceSerialNumber']}"
|
||||||
|
)
|
||||||
|
books = self.get_all_books(filetype=filetype, start_index=start_index)
|
||||||
|
if start_index > 0:
|
||||||
|
print(f"resuming the download {start_index + 1}/{self.total_to_download}")
|
||||||
|
index = start_index
|
||||||
|
for book in books:
|
||||||
|
self.download_one_book(book, device, index, filetype)
|
||||||
|
index += 1
|
||||||
|
if self.not_done:
|
||||||
|
logger.error(
|
||||||
|
f"\n\nNot All done!\nAmazon api limit when this download done.\n You can add command `--resume-from {index}` to resume download next time"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
if not self.dedrm:
|
||||||
|
logger.info(
|
||||||
|
"\n\nAll done!\nNow you can use apprenticeharper's DeDRM tools "
|
||||||
|
"(https://github.com/apprenticeharper/DeDRM_tools)\n"
|
||||||
|
"with the following serial number to remove DRM: "
|
||||||
|
+ device["deviceSerialNumber"]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info(
|
||||||
|
"All done books saved in `DOWNLOAD`, dedrm files saved in `DEDRMS`"
|
||||||
|
)
|
||||||
|
with open(os.path.join(self.out_dir, "key.txt"), "w") as f:
|
||||||
|
f.write(f"Key is: {device['deviceSerialNumber']}")
|
||||||
17
kindle_download_helper/utils.py
Normal file
17
kindle_download_helper/utils.py
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
import re
|
||||||
|
from kindle_download_helper.config import GITHUB_README_COMMENTS
|
||||||
|
|
||||||
|
|
||||||
|
def replace_readme_comments(file_name, comment_str, comments_name):
|
||||||
|
with open(file_name, "r+", encoding="UTF-8") as f:
|
||||||
|
text = f.read()
|
||||||
|
# regrex sub from github readme comments
|
||||||
|
text = re.sub(
|
||||||
|
GITHUB_README_COMMENTS.format(name=comments_name),
|
||||||
|
r"\1{}\n\3".format(comment_str),
|
||||||
|
text,
|
||||||
|
flags=re.DOTALL,
|
||||||
|
)
|
||||||
|
f.seek(0)
|
||||||
|
f.write(text)
|
||||||
|
f.truncate()
|
||||||
@@ -1,56 +1,3 @@
|
|||||||
|
|
||||||
<!--START_SECTION:my_kindle-->
|
<!--START_SECTION:my_kindle-->
|
||||||
## My kindle stats
|
|
||||||
- I bought 38 books
|
|
||||||
- I pushed 871 docks
|
|
||||||
- My first book is 知乎周刊·商业的细节, bought on 2013年9月16日
|
|
||||||
- My first doc is 天涯头条:《刘军宁:文明社会与言论自由,事关社会安定和对权力的制约》, bought on 2015年3月16日
|
|
||||||
|
|
||||||
| ID | Title | Authors | Acquired | Read |
|
|
||||||
| ---- | ---- | ---- | ---- | ---- |
|
|
||||||
| 1 | 其主之声 | 斯坦尼斯瓦夫·莱姆 | 2022-5-3 | 2022-05-03 |
|
|
||||||
| 2 | 奇鸟行状录 | 村上春树 | 2022-3-22 | 2022-03-22 |
|
|
||||||
| 3 | Origin原型機-第03卷 | Boichi | 2021-6-1 | 2022-02-11 |
|
|
||||||
| 4 | Origin原型機-第05卷 | Boichi | 2021-6-1 | 2022-02-11 |
|
|
||||||
| 5 | Origin原型機-第07卷 | Boichi | 2021-6-1 | 2022-02-11 |
|
|
||||||
| 6 | 第一人称单数 | 村上春树 | 2021-11-28 | 2022-02-05 |
|
|
||||||
| 7 | 挽救计划 | 安迪·威尔 | 2021-11-18 | 2021-11-18 |
|
|
||||||
| 8 | [日本名家小说集](https://www.amazon.cn/dp/B08P8KXYZ1) | 东野圭吾, 伊坂幸太郎... | 2021-5-1 | 2021-11-17 |
|
|
||||||
| 9 | 两京十五日 | 马伯庸 | 2021-10-3 | 2021-10-05 |
|
|
||||||
| 10 | 绝叫 | 叶真中显 | 2021-8-17 | 2021-08-22 |
|
|
||||||
| 11 | 炎拳-第02卷 | 105965398155@vol.moe... | 2020-11-8 | 2021-07-24 |
|
|
||||||
| 12 | Origin原型機-第02卷 | Boichi | 2021-6-1 | 2021-06-04 |
|
|
||||||
| 13 | Origin原型機-第01卷 | Boichi | 2021-6-1 | 2021-06-03 |
|
|
||||||
| 14 | 殺手寓言-第15卷 | 南勝久 | 2021-4-9 | 2021-04-10 |
|
|
||||||
| 15 | 殺手寓言-第14卷 | 南勝久 | 2021-4-6 | 2021-04-09 |
|
|
||||||
| 16 | 殺手寓言-第13卷 | 南勝久 | 2021-4-6 | 2021-04-09 |
|
|
||||||
| 17 | 殺手寓言-第12卷 | 南勝久 | 2021-4-6 | 2021-04-08 |
|
|
||||||
| 18 | 殺手寓言-第11卷 | 南勝久 | 2021-4-6 | 2021-04-08 |
|
|
||||||
| 19 | 殺手寓言-第10卷 | 南勝久 | 2021-4-6 | 2021-04-08 |
|
|
||||||
| 20 | 殺手寓言-第09卷 | 南勝久 | 2021-4-6 | 2021-04-07 |
|
|
||||||
| 21 | 殺手寓言-第08卷 | 南勝久 | 2021-4-6 | 2021-04-07 |
|
|
||||||
| 22 | 殺手寓言-第07卷 | 南勝久 | 2021-4-4 | 2021-04-05 |
|
|
||||||
| 23 | 殺手寓言-第06卷 | 南勝久 | 2021-4-4 | 2021-04-05 |
|
|
||||||
| 24 | 殺手寓言-第05卷 | 南勝久 | 2021-4-4 | 2021-04-05 |
|
|
||||||
| 25 | 殺手寓言-第04卷 | 南勝久 | 2021-4-4 | 2021-04-04 |
|
|
||||||
| 26 | 殺手寓言-第03卷 | 南勝久 | 2021-3-31 | 2021-04-03 |
|
|
||||||
| 27 | 殺手寓言-第02卷 | 南勝久 | 2021-3-31 | 2021-04-02 |
|
|
||||||
| 28 | 殺手寓言-第01卷 | 南勝久 | 2021-3-31 | 2021-04-01 |
|
|
||||||
| 29 | 夏日重現-第12卷 | 田中靖規 | 2021-2-19 | 2021-03-06 |
|
|
||||||
| 30 | 夏日重現-第11卷 | 田中靖規 | 2021-2-19 | 2021-03-05 |
|
|
||||||
| 31 | 夏日重現-第10卷 | 田中靖規 | 2021-2-19 | 2021-03-04 |
|
|
||||||
| 32 | 夏日重現-第09卷 | 田中靖規 | 2021-2-19 | 2021-03-04 |
|
|
||||||
| 33 | 夏日重現-第08卷 | 田中靖規 | 2021-2-19 | 2021-02-28 |
|
|
||||||
| 34 | 夏日重現-第07卷 | 田中靖規 | 2021-2-19 | 2021-02-28 |
|
|
||||||
| 35 | 夏日重現-第06卷 | 田中靖規 | 2021-2-19 | 2021-02-26 |
|
|
||||||
| 36 | 夏日重現-第05卷 | 田中靖規 | 2021-2-19 | 2021-02-23 |
|
|
||||||
| 37 | 夏日重現-第04卷 | 田中靖規 | 2021-2-19 | 2021-02-21 |
|
|
||||||
| 38 | 夏日重現-第03卷 | 田中靖規 | 2021-2-19 | 2021-02-21 |
|
|
||||||
| 39 | 夏日重現-第02卷 | 田中靖規 | 2021-2-19 | 2021-02-19 |
|
|
||||||
| 40 | 夏日重現-第01卷 | 田中靖規 | 2021-2-19 | 2021-02-19 |
|
|
||||||
| 41 | 1984 | George Orwell... | 2015-7-1 | 2021-02-18 |
|
|
||||||
| 42 | 炎拳-第01卷 | 105965398155@vol.moe... | 2020-11-8 | 2020-11-10 |
|
|
||||||
| 43 | 1984 | George Orwell... | 2015-7-1 | 2020-04-25 |
|
|
||||||
| 44 | 锦衣之下 | 蓝色狮 | 2020-1-7 | 2020-01-27 |
|
|
||||||
|
|
||||||
<!--END_SECTION:my_kindle-->
|
<!--END_SECTION:my_kindle-->
|
||||||
|
|
||||||
23
setup.py
Normal file
23
setup.py
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
from setuptools import find_packages, setup
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name="kindle_download",
|
||||||
|
author="yihong0618",
|
||||||
|
author_email="zouzou0208@gmail.com",
|
||||||
|
url="https://github.com/yihong0618/kindle_download_helper",
|
||||||
|
license="GPL V3",
|
||||||
|
version="1.1.1",
|
||||||
|
description="Download all your kindle books and `DeDRM` script.",
|
||||||
|
long_description="Download all your kindle books and `DeDRM` script.",
|
||||||
|
packages=find_packages(),
|
||||||
|
include_package_data=True,
|
||||||
|
install_requires=[
|
||||||
|
"requests",
|
||||||
|
"browser-cookie3",
|
||||||
|
"faker",
|
||||||
|
"pywin32 ; sys_platform == 'win32'"
|
||||||
|
],
|
||||||
|
entry_points={
|
||||||
|
"console_scripts": ["kindle_download = kindle_download_helper.cli:main"],
|
||||||
|
},
|
||||||
|
)
|
||||||
@@ -300,6 +300,6 @@ class Ui_MainDialog(object):
|
|||||||
self.label_6.setText(QCoreApplication.translate("MainDialog", u"\u9690\u79c1\u58f0\u660e\uff1a\u6211\u4eec\u4e0d\u4f1a\u6536\u96c6\u4efb\u4f55\u7528\u6237\u4fe1\u606f\uff0c\u8bf7\u653e\u5fc3\u4f7f\u7528", None))
|
self.label_6.setText(QCoreApplication.translate("MainDialog", u"\u9690\u79c1\u58f0\u660e\uff1a\u6211\u4eec\u4e0d\u4f1a\u6536\u96c6\u4efb\u4f55\u7528\u6237\u4fe1\u606f\uff0c\u8bf7\u653e\u5fc3\u4f7f\u7528", None))
|
||||||
self.label_3.setText(QCoreApplication.translate("MainDialog", u"Copyright 2022 \u00a9 [yihong0618](https://github.com/yihong0618) and [frostming](https://github.com/frostming)", None))
|
self.label_3.setText(QCoreApplication.translate("MainDialog", u"Copyright 2022 \u00a9 [yihong0618](https://github.com/yihong0618) and [frostming](https://github.com/frostming)", None))
|
||||||
self.label_4.setText(QCoreApplication.translate("MainDialog", u"GitHub: <https://github.com/yihong0618/Kindle_download_helper>", None))
|
self.label_4.setText(QCoreApplication.translate("MainDialog", u"GitHub: <https://github.com/yihong0618/Kindle_download_helper>", None))
|
||||||
self.label_5.setText(QCoreApplication.translate("MainDialog", u"License: MIT", None))
|
self.label_5.setText(QCoreApplication.translate("MainDialog", u"License: GPL V3", None))
|
||||||
# retranslateUi
|
# retranslateUi
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user