From ed1b607ffb53a425a5118a6670720e9ac8fca85e Mon Sep 17 00:00:00 2001 From: yihong0618 Date: Mon, 5 Feb 2024 23:27:20 +0800 Subject: [PATCH] fix: #153 support bookmark Signed-off-by: yihong0618 --- .gitignore | 4 +- README.md | 4 + kindle_download_helper/kindle.py | 6 +- kindle_download_helper/no_cli.py | 10 ++ kindle_download_helper/no_kindle.py | 133 +++++++++++++++--- .../third_party/kfxlib/epub_output.py | 14 +- .../third_party/kfxlib/ion_symbol_table.py | 6 +- .../third_party/kfxlib/jxr_container.py | 6 +- .../third_party/kfxlib/jxr_image.py | 6 +- .../third_party/kfxlib/kfx_container.py | 6 +- .../third_party/kfxlib/kpf_container.py | 6 +- .../kfxlib/original_source_epub.py | 6 +- .../third_party/kfxlib/utilities.py | 8 +- .../third_party/kfxlib/yj_container.py | 8 +- .../third_party/kfxlib/yj_metadata.py | 28 ++-- .../kfxlib/yj_position_location.py | 32 +++-- .../third_party/kfxlib/yj_structure.py | 6 +- .../kfxlib/yj_to_epub_navigation.py | 6 +- .../kfxlib/yj_to_epub_properties.py | 6 +- 19 files changed, 217 insertions(+), 84 deletions(-) diff --git a/.gitignore b/.gitignore index b10a249..5261110 100644 --- a/.gitignore +++ b/.gitignore @@ -140,4 +140,6 @@ DOWNLOADS/ .tokens .tokens.com *.csv -my_kindle_stats.md \ No newline at end of file +my_kindle_stats.md +pdocs_bookmark.json +ebooks_bookmark.json \ No newline at end of file diff --git a/README.md b/README.md index f38357c..e247d8c 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,10 @@ python no_kindle.py -e ${email} -p ${password} # 你可以生成所有你电子书的购买记录,笔记记录来分析展示 python no_kindle.py -e ${email} -p ${password} --memory + +# 支持导出全部标记书签及阅读信息(Clipping 信息)#153 +python no_kindle.py -e ${email} -p ${password} --bookmark + ``` diff --git a/kindle_download_helper/kindle.py b/kindle_download_helper/kindle.py index d5fb9a8..99194af 100644 --- a/kindle_download_helper/kindle.py +++ b/kindle_download_helper/kindle.py @@ -408,9 +408,9 @@ class Kindle: books_len=len(ebooks) if ebooks else 0, pdocs_len=len(pdocs) if pdocs else 0, first_book_title=first_ebook["title"] if first_ebook else "", - first_book_bought_date=first_ebook["acquiredDate"] - if first_ebook - else "", + first_book_bought_date=( + first_ebook["acquiredDate"] if first_ebook else "" + ), first_doc_title=first_pdoc["title"] if first_pdoc else "", first_doc_push_date=first_pdoc["acquiredDate"] if first_pdoc else "", ) diff --git a/kindle_download_helper/no_cli.py b/kindle_download_helper/no_cli.py index f3a77c3..1e60be4 100644 --- a/kindle_download_helper/no_cli.py +++ b/kindle_download_helper/no_cli.py @@ -90,6 +90,12 @@ def no_main(): action="store_true", help="Generate your kindle memory to md and csv files", ) + parser.add_argument( + "--bookmark", + dest="bookmark", + action="store_true", + help="Generate your kindle bookmark to md and json files", + ) parser.add_argument( "--only-price", dest="only_price", @@ -128,6 +134,10 @@ def no_main(): nk.make_ebook_memory() return + if options.bookmark: + nk.make_all_bookmark() + return + # download books part if options.pdoc: nk.download_all_pdocs() diff --git a/kindle_download_helper/no_kindle.py b/kindle_download_helper/no_kindle.py index 22f446f..c64c39f 100644 --- a/kindle_download_helper/no_kindle.py +++ b/kindle_download_helper/no_kindle.py @@ -187,7 +187,9 @@ class NoKindle: else: book_title = i["title"] book_title = re.sub( - r"(\([^)]*\))|(\([^)]*\))|(\【[^)]*\】)|(\[[^)]*\])|(\s)", "", book_title + r"(\([^)]*\))|(\([^)]*\))|(\【[^)]*\】)|(\[[^)]*\])|(\s)", + "", + book_title, ) book_title = book_title.replace(" ", "") is_pdoc = i.get("origins") is None @@ -247,17 +249,11 @@ class NoKindle: tokens=self.tokens, ) ) - print(r.json()) + return r.json() except: return None - def make_all_pdoc_info(self): - for asin, v in self.pdoc_library_dict.items(): - print(asin, v) - self.pdoc_bookmark(asin) - def make_all_ebook_info(self): - # TODO pdoc self.highlight_index = 0 for asin, v in self.ebook_library_dict.items(): self.highlight_index += 1 @@ -274,7 +270,7 @@ class NoKindle: for r in manifest["resources"]: if r["type"] == "KINDLE_USER_ANOT": url = r["endpoint"]["url"] - book_mark_info = self.sidecar_bookmark(url) + book_mark_info = self.ebook_bookmark(url) if not book_mark_info: continue records = book_mark_info["payload"]["records"] @@ -376,7 +372,7 @@ class NoKindle: print(f"Order error to error list {order_id}") self.error_price_list.append(v) - def sidecar_bookmark(self, sidecar_url): + def ebook_bookmark(self, sidecar_url): r = self.session.send( amazon_api.signed_request( "GET", @@ -407,17 +403,43 @@ class NoKindle: ) ) try: - resources = manifest_resp.json()["resources"] + resources_data = manifest_resp.json() + if resources_data.get("resources") is None: + print(f"wrong resource for asin {asin} error: {resources_data}") + data = self._list_book_consumptions(asin) + devices_ids_string = ",".join( + [ + i["deviceAccountId"] + for i in data["ListConsumptionsResponse"]["result"]["entry"][ + "value" + ]["entry"]["value"]["member"] + ] + ) + print(devices_ids_string) + self._remove_book_consumptions(asin, devices_ids_string) + # do it again + manifest_resp = self.session.send( + amazon_api.signed_request( + "GET", + API_MANIFEST_URL + asin.upper(), + asin=asin, + tokens=self.tokens, + request_type="manifest", + ) + ) + resources_data = manifest_resp.json() + resources = resources_data["resources"] + else: + resources = resources_data["resources"] except Exception as e: - print(manifest_resp.json(), str(e)) + print(resources_data, str(e)) return None, False, str(e) - manifest = manifest_resp.json() # azw3 is not so hard drm_voucher_list = [ resource for resource in resources if resource["type"] == "DRM_VOUCHER" ] if not drm_voucher_list: - return manifest, False, "Succeed" + return resources_data, False, "Succeed" drm_voucher = drm_voucher_list[0] try: @@ -427,13 +449,57 @@ class NoKindle: except: print("Could not decrypt the drm voucher!") - manifest["responseContext"] = self._b64ion_to_dict(manifest["responseContext"]) - for resource in manifest["resources"]: + resources_data["responseContext"] = self._b64ion_to_dict( + resources_data["responseContext"] + ) + for resource in resources_data["resources"]: if "responseContext" in resource: resource["responseContext"] = self._b64ion_to_dict( resource["responseContext"] ) - return manifest, True, "Succeed" + return resources_data, True, "Succeed" + + def _list_book_consumptions(self, asin): + url = f"https://prod.us-east-1.library-relay.kindle.amazon.dev/list-consumptions?contentInput=%5B%7B%22id%22%3A%22{asin}%22%2C%22type%22%3A%22EBook%22%2C%22pid%22%3A%22%22%7D%5D" + + r = requests.get( + url, + headers={ + "User-Agent": random.choice(USER_AGENTS), + "Authorization": f"Bearer {self.tokens['access_token']}", + "client": "KindleForiOS", + }, + ) + try: + print(xmltodict.parse(r.text)) + return xmltodict.parse(r.text) + except Exception as e: + print(e) + return None + + def _remove_book_consumptions(self, asin, devices_id_string): + headers = { + "Authorization": f"Bearer {self.tokens['access_token']}", + "Upload-Incomplete": "?0", + "Upload-Draft-Interop-Version": "3", + "client": "KindleForiOS", + } + + json_data = { + "id": asin, + "type": "EBook", + "pid": "", + "deviceAccountIds": devices_id_string, + } + + try: + requests.post( + "https://prod.us-east-1.library-relay.kindle.amazon.dev/remove-consumptions", + headers=headers, + json=json_data, + ) + except Exception as e: + print(f"Something is wrong for delete devices for {asin} error: {str(e)}") def download_book(self, asin, error=None): manifest, is_kfx, info = self.get_book(asin) @@ -717,6 +783,39 @@ class NoKindle: writer.writerow(row) print("File: my_kindle_stats.csv and my_kindle_stats.md have been generated") + def make_all_bookmark(self): + """ + this include both ebooks and pdocs + """ + amazon_api.refresh(self.tokens) + # make all ebooks bookmark + ebook_bookmark_dict_list = [] + pdoc_bookmarl_dict_list = [] + for asin, value in self.ebook_library_dict.items(): + manifest, _, info = self.get_book(asin) + if not manifest: + continue + for r in manifest["resources"]: + if r["type"] == "KINDLE_USER_ANOT": + url = r["endpoint"]["url"] + book_mark_info = self.ebook_bookmark(url) + if book_mark_info: + value.update(book_mark_info) + print(value) + ebook_bookmark_dict_list.append(value) + with open("ebooks_bookmark.json", "w", encoding="utf8") as f: + json.dump(ebook_bookmark_dict_list, f, indent=4, ensure_ascii=False) + + # make all pdoc bookmark + for asin, value in self.pdoc_library_dict.items(): + pdoc_bookmark = self.pdoc_bookmark(asin) + if pdoc_bookmark: + value.update(pdoc_bookmark) + print(value) + pdoc_bookmarl_dict_list.append(value) + with open("pdocs_bookmark.json", "w", encoding="utf8") as f: + json.dump(pdoc_bookmarl_dict_list, f, indent=4, ensure_ascii=False) + if __name__ == "__main__": kindle = NoKindle() diff --git a/kindle_download_helper/third_party/kfxlib/epub_output.py b/kindle_download_helper/third_party/kfxlib/epub_output.py index 9a23c94..e35f6f3 100644 --- a/kindle_download_helper/third_party/kfxlib/epub_output.py +++ b/kindle_download_helper/third_party/kfxlib/epub_output.py @@ -353,9 +353,9 @@ class EPUB_Output(object): def set_book_type(self, book_type): self.book_type = book_type - self.is_children = ( - self.is_comic - ) = self.is_magazine = self.is_print_replica = False + self.is_children = self.is_comic = self.is_magazine = self.is_print_replica = ( + False + ) if self.book_type is None: pass @@ -1125,9 +1125,11 @@ class EPUB_Output(object): if not self.generate_epub2: add_metadata_meta_property( prefix("rendition:orientation"), - self.orientation_lock - if self.orientation_lock != "none" - else "auto", + ( + self.orientation_lock + if self.orientation_lock != "none" + else "auto" + ), ) add_metadata_meta_name_content("orientation-lock", self.orientation_lock) diff --git a/kindle_download_helper/third_party/kfxlib/ion_symbol_table.py b/kindle_download_helper/third_party/kfxlib/ion_symbol_table.py index c1c2221..c822f46 100644 --- a/kindle_download_helper/third_party/kfxlib/ion_symbol_table.py +++ b/kindle_download_helper/third_party/kfxlib/ion_symbol_table.py @@ -52,9 +52,9 @@ class SymbolTableCatalog(object): or shared_symbol_table.version >= self.shared_symbol_tables[(shared_symbol_table.name, None)].version ): - self.shared_symbol_tables[ - (shared_symbol_table.name, None) - ] = shared_symbol_table + self.shared_symbol_tables[(shared_symbol_table.name, None)] = ( + shared_symbol_table + ) def create_shared_symbol_table(self, symbol_table_data): self.add_shared_symbol_table( diff --git a/kindle_download_helper/third_party/kfxlib/jxr_container.py b/kindle_download_helper/third_party/kfxlib/jxr_container.py index ddef8f4..983a7a2 100644 --- a/kindle_download_helper/third_party/kfxlib/jxr_container.py +++ b/kindle_download_helper/third_party/kfxlib/jxr_container.py @@ -69,9 +69,9 @@ class JXRContainer(object): header.extract(ifd_offset - header.offset) pixel_format = "" - self.image_width = ( - self.image_height - ) = image_offset = image_byte_count = self.image_data = None + self.image_width = self.image_height = image_offset = image_byte_count = ( + self.image_data + ) = None num_entries = header.unpack("