diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..c4037f8 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "markdown.extension.toc.levels": "1..3" +} \ No newline at end of file diff --git a/Makefile b/Makefile index 5d9e5f0..998062a 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ BASE_IMAGE_TAG=3.12-slim-bookworm IMAGE_NAME=homeylab/bookstack-file-exporter # keep this start sequence unique (IMAGE_TAG=) # github actions will use this to create a tag -IMAGE_TAG=1.0.0 +IMAGE_TAG=1.0.1 DOCKER_WORK_DIR=/export DOCKER_CONFIG_DIR=/export/config DOCKER_EXPORT_DIR=/export/dump diff --git a/README.md b/README.md index 874f691..b204885 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,21 @@ # bookstack-file-exporter Table of Contents -- [Background](#background) -- [Using This Application](#using-this-application) +- [bookstack-file-exporter](#bookstack-file-exporter) + - [Background](#background) + - [Features](#features) + - [Use Case](#use-case) + - [Using This Application](#using-this-application) - [Run via Pip](#run-via-pip) - - [Run via Docker](#run-via-docker) -- [Authentication](#authentication) -- [Configuration](#configuration) - - [Simple example](#just-run) - - [Full example](#full-example) - - [Options and descriptions](#options-and-descriptions) - - [Environment variables](#valid-environment-variables) -- [Backup Behavior](#backup-behavior) + - [Run Via Docker](#run-via-docker) + - [Authentication](#authentication) + - [Configuration](#configuration) + - [Backup Behavior](#backup-behavior) + - [General](#general) - [Images](#images) - [Modify Markdown Files](#modify-markdown-files) -- [Object Storage](#object-storage) - - [Minio](#minio-backups) -- [Future Items](#future-items) + - [Object Storage](#object-storage) + - [Minio Backups](#minio-backups) + - [Future Items](#future-items) ## Background _Features are actively being developed. See `Future Items` section for more details. Open an issue for a feature request._ @@ -122,6 +122,7 @@ Docker can be utilized to run the exporter. #### Examples ```bash +# --user flag to override the uid/gid for created files. Set this to your uid/gid docker run \ --user ${USER_ID}:${USER_GID} \ -v $(pwd)/config.yml:/export/config/config.yml:ro \ @@ -144,7 +145,7 @@ Tokens and other options can be specified, example: ```bash # '-e' flag for env vars -# --user flag to override the uid/gid for created files +# --user flag to override the uid/gid for created files. Set this to your uid/gid docker run \ -e LOG_LEVEL='debug' \ -e BOOKSTACK_TOKEN_ID='xyz' \ @@ -190,11 +191,11 @@ host: "https://bookstack.yourdomain.com" credentials: token_id: "" token_secret: "" -formats: +formats: # md only example - markdown -- html -- pdf -- plaintext +# - html +# - pdf +# - plaintext output_path: "bkps/" assets: export_images: false @@ -204,7 +205,7 @@ assets: ``` #### Full Example -Below is an example configuration that shows all possible options, +Below is an example configuration that shows example values for all possible options. ```yaml host: "https://bookstack.yourdomain.com" @@ -271,10 +272,9 @@ General ## Backup Behavior -### Export File +### General Backups are exported in `.tgz` format and generated based off timestamp. Export names will be in the format: `%Y-%m-%d_%H-%M-%S` (Year-Month-Day_Hour-Minute-Second). *Files are first pulled locally to create the tarball and then can be sent to object storage if needed*. Example file name: `bookstack_export_2023-09-22_07-19-54.tgz`. -### General The exporter can also do housekeeping duties and keep a configured number of archives and delete older ones. See `keep_last` property in the [Configuration](#options-and-descriptions) section. Object storage provider configurations include their own `keep_last` property for flexibility. For file names, `slug` names (from Bookstack API) are used, as such certain characters like `!`, `/` will be ignored and spaces replaced from page names/titles. @@ -287,38 +287,42 @@ Shelves --> Books --> Chapters --> Pages kafka (shelf) ---> controller (book) ---> settings (chapter) - ---> retention-settings (page) - ---> retention-settings.md - ---> retention-settings_meta.json - ---> compression (page) - ---> compression.html - ---> compression.pdf - ---> compression_meta.json - ---> optional-config (page) + ---> retention-settings.md (page) + ---> retention-settings_meta.json + ... + ---> compression.html (page) + ---> compression.pdf + ---> compression_meta.json + ... + ---> optional-config.md (page) ... - ---> main (page) + ---> main.md (page) ... ---> broker (book) - ---> settings (page) + ---> settings.md (page) ... - ---> deploy (page) + ---> deploy.md (page) ... kafka-apps (shelf) ---> schema-registry (book) - ---> protobuf (page) + ---> protobuf.md (page) ... - ---> settings (page) + ---> settings.md (page) ... ## Example with image layout -unassigned (Used for books with no shelf) +# unassigned dir is used for books with no shelf +unassigned (shelf) ---> test (book) - ---> test_page (page) - ---> test_page.md - ---> test_page.pdf - ---> images (image_dir) + ---> images (image_dir) + ---> test_page (page directory) ---> img-001.png ---> img-002.png + ---> rec-page + ---> img-010.png + ---> img-020.png + ---> test_page.md (page) + ... ---> rec_page (page) ---> rec_page.md ---> rec_page.pdf @@ -330,20 +334,15 @@ Another example is shown below: # book = react # basics = page -bookstack_export_2023-11-20_08-00-29/programming/react/basics/basics.md -bookstack_export_2023-11-20_08-00-29/programming/react/basics/basics.html -bookstack_export_2023-11-20_08-00-29/programming/react/basics/basics.pdf -bookstack_export_2023-11-20_08-00-29/programming/react/basics/basics.txt -bookstack_export_2023-11-20_08-00-29/programming/react/basics/basics_meta.json -bookstack_export_2023-11-20_08-00-29/programming/react/basics/images/YKvimage.png -bookstack_export_2023-11-20_08-00-29/programming/react/basics/images/dwwimage.png -bookstack_export_2023-11-20_08-00-29/programming/react/basics/images/NzZimage.png -bookstack_export_2023-11-20_08-00-29/programming/react/basics/images/Mymimage.png -bookstack_export_2023-11-20_08-00-29/programming/react/nextjs/nextjs.md -bookstack_export_2023-11-20_08-00-29/programming/react/nextjs/nextjs.html -bookstack_export_2023-11-20_08-00-29/programming/react/nextjs/nextjs.pdf -bookstack_export_2023-11-20_08-00-29/programming/react/nextjs/nextjs.txt -bookstack_export_2023-11-20_08-00-29/programming/react/nextjs/nextjs_meta.json +bookstack_export_2023-11-28_06-24-25/programming/react/basics.md +bookstack_export_2023-11-28_06-24-25/programming/react/basics.pdf +bookstack_export_2023-11-28_06-24-25/programming/react/images/basics/YKvimage.png +bookstack_export_2023-11-28_06-24-25/programming/react/images/basics/dwwimage.png +bookstack_export_2023-11-28_06-24-25/programming/react/images/basics/NzZimage.png +bookstack_export_2023-11-28_06-24-25/programming/react/images/nextjs/next1.png +bookstack_export_2023-11-28_06-24-25/programming/react/images/nextjs/tips.png +bookstack_export_2023-11-28_06-24-25/programming/react/nextjs.md +bookstack_export_2023-11-28_06-24-25/programming/react/nextjs.pdf ``` Books without a shelf will be put in a shelve folder named `unassigned`. @@ -362,14 +361,13 @@ You may notice some directories (books) and/or files (pages) in the archive have ### Images -### General -Images will be dumped in a separate directory, `images` within the page directory it belongs to. As shown earlier: +Images will be dumped in a separate directory, `images` within the page parent (book/chapter) directory it belongs to. The relative path will be `{parent}/images/{page}/{image_name}`. As shown earlier: ``` -bookstack_export_2023-11-20_08-00-29/programming/react/basics/images/YKvimage.png -bookstack_export_2023-11-20_08-00-29/programming/react/basics/images/dwwimage.png -bookstack_export_2023-11-20_08-00-29/programming/react/basics/images/NzZimage.png -bookstack_export_2023-11-20_08-00-29/programming/react/basics/images/Mymimage.png +bookstack_export_2023-11-28_06-24-25/programming/react/images/basics/dwwimage.png +bookstack_export_2023-11-28_06-24-25/programming/react/images/basics/NzZimage.png +bookstack_export_2023-11-28_06-24-25/programming/react/images/nextjs/next1.png +bookstack_export_2023-11-28_06-24-25/programming/react/images/nextjs/tips.png ``` **Note you may see old images in your exports. This is because, by default, Bookstack retains images/drawings that are uploaded even if no longer referenced on an active page. Admins can run `Cleanup Images` in the Maintenance Settings or via [CLI](https://www.bookstackapp.com/docs/admin/commands/#cleanup-unused-images) to remove them.** @@ -385,7 +383,7 @@ Page (parent) -> Images (Children) relationships are created and then each image [![pool-topology-1.png](https://demo.bookstack/uploads/images/gallery/2023-07/scaled-1680-/pool-topology-1.png)](https://demo.bookstack/uploads/images/gallery/2023-07/pool-topology-1.png) ## after -[![pool-topology-1.png](./images/pool-topology-1.png)](https://demo.bookstack/uploads/images/gallery/2023-07/pool-topology-1.png) +[![pool-topology-1.png](./images/{page_name}/pool-topology-1.png)](https://demo.bookstack/uploads/images/gallery/2023-07/pool-topology-1.png) ``` This allows the image to be found locally within the export files and allow your `markdown` docs to have all the images display properly like it would normally would. diff --git a/bookstack_file_exporter/archiver/archiver.py b/bookstack_file_exporter/archiver/archiver.py index 329343c..c6d01c5 100644 --- a/bookstack_file_exporter/archiver/archiver.py +++ b/bookstack_file_exporter/archiver/archiver.py @@ -47,7 +47,7 @@ def get_bookstack_exports(self, page_nodes: Dict[int, Node]): if page.id_ in all_image_meta: page_image_meta = all_image_meta[page.id_] self._get_page_files(page, page_image_meta) - self._get_page_images(page.file_path, page_image_meta) + self._get_page_images(page, page_image_meta) def _get_page_files(self, page_node: Node, image_meta: List[ImageNode]): """pull all bookstack pages into local files/tar""" @@ -60,12 +60,13 @@ def _get_page_image_map(self) -> Dict[int, ImageNode]: return {} return self._page_archiver.get_image_meta() - def _get_page_images(self, page_path: str, img_nodes: List[ImageNode]): + def _get_page_images(self, page_node: Node, img_nodes: List[ImageNode]): if not img_nodes: log.debug("page has no images to pull") return log.debug("Exporting bookstack page images") - self._page_archiver.archive_page_images(page_path, img_nodes) + self._page_archiver.archive_page_images(page_node.parent.file_path, + page_node.name, img_nodes) def create_archive(self): """create tgz archive""" diff --git a/bookstack_file_exporter/archiver/page_archiver.py b/bookstack_file_exporter/archiver/page_archiver.py index e159bbb..9c0b07a 100644 --- a/bookstack_file_exporter/archiver/page_archiver.py +++ b/bookstack_file_exporter/archiver/page_archiver.py @@ -43,15 +43,14 @@ def __init__(self, img_meta_data: Dict[str, Union[int, str]]): self.url: str = img_meta_data['url'] self.name: str = self._get_image_name() self._markdown_str = "" - self._image_relative_path: str = f"./{_IMAGE_DIR_NAME}/{self.name}" + self._relative_path_prefix: str = f"./{_IMAGE_DIR_NAME}" def _get_image_name(self) -> str: return self.url.split('/')[-1] - @property - def image_relative_path(self): + def get_image_relative_path(self, page_name: str) -> str: """return image path local to page directory""" - return self._image_relative_path + return f"{self._relative_path_prefix}/{page_name}/{self.name}" @property def markdown_str(self): @@ -92,7 +91,7 @@ def __init__(self, archive_dir: str, config: ConfigNode) -> None: self.export_formats = config.user_inputs.formats self.api_urls = config.urls self._headers = config.headers - # parent export directory, bookstack-, and .tgz extension + # full path, bookstack-, and .tgz extension self.archive_file = f"{archive_dir}{_FILE_EXTENSION_MAP['tgz']}" # name of intermediate tar file before gzip self.tar_file = f"{archive_dir}{_FILE_EXTENSION_MAP['tar']}" @@ -114,14 +113,14 @@ def archive_page(self, page: Node, self._archive_page(page, export_format, page_data, image_urls) if self.asset_config.export_meta: - self._archive_page_meta(page.name, page.file_path, page.meta) + self._archive_page_meta(page.file_path, page.meta) def _archive_page(self, page: Node, export_format: str, data: bytes, image_nodes: List[ImageNode] = None): page_file_name = f"{self.archive_base_path}/" \ - f"{page.file_path}/{page.name}{_FILE_EXTENSION_MAP[export_format]}" + f"{page.file_path}{_FILE_EXTENSION_MAP[export_format]}" if self.modify_md and export_format == _MARKDOWN_STR_CHECK and image_nodes: - data = self._update_image_links(data, image_nodes) + data = self._update_image_links(page.name, data, image_nodes) self.write_data(page_file_name, data) def _get_page_data(self, page_id: int, export_format: str): @@ -129,10 +128,8 @@ def _get_page_data(self, page_id: int, export_format: str): return archiver_util.get_byte_response(url=url, headers=self._headers, verify_ssl=self.verify_ssl) - def _archive_page_meta(self, page_name: str, page_path: str, - meta_data: Dict[str, Union[str, int]]): - meta_file_name = f"{self.archive_base_path}/{page_path}/" \ - f"{page_name}{_FILE_EXTENSION_MAP['meta']}" + def _archive_page_meta(self, page_path: str, meta_data: Dict[str, Union[str, int]]): + meta_file_name = f"{self.archive_base_path}/{page_path}{_FILE_EXTENSION_MAP['meta']}" bytes_meta = archiver_util.get_json_bytes(meta_data) self.write_data(file_path=meta_file_name, data=bytes_meta) @@ -145,14 +142,14 @@ def get_image_meta(self) -> Dict[int, List[ImageNode]]: img_meta_json = img_meta_response.json()['data'] return self._create_image_map(img_meta_json) - def archive_page_images(self, page_path: str, image_nodes: List[ImageNode]): + def archive_page_images(self, parent_path: str, page_name: str, + image_nodes: List[ImageNode]): """pull images locally into a directory based on page""" - # image_base_path = f"{self.archive_base_path}/{page_path}{_IMAGE_DIR_SUFFIX}" - image_base_path = f"{self.archive_base_path}/{page_path}/{_IMAGE_DIR_NAME}" + image_base_path = f"{self.archive_base_path}/{parent_path}/{_IMAGE_DIR_NAME}" for img_node in image_nodes: img_data: bytes = archiver_util.get_byte_response(img_node.url, self._headers, self.verify_ssl) - image_path = f"{image_base_path}/{img_node.name}" + image_path = f"{image_base_path}/{page_name}/{img_node.name}" self.write_data(image_path, img_data) def write_data(self, file_path: str, data: bytes): @@ -168,7 +165,8 @@ def gzip_archive(self): """provide the tar to gzip and the name of the gzip output file""" archiver_util.create_gzip(self.tar_file, self.archive_file) - def _update_image_links(self, page_data: bytes, image_nodes: List[ImageNode]) -> bytes: + def _update_image_links(self, page_name: str, page_data: bytes, + image_nodes: List[ImageNode]) -> bytes: """regex replace links to local created directories""" for img_node in image_nodes: img_meta_url = f"{self.api_urls['images']}/{img_node.id}" @@ -179,7 +177,7 @@ def _update_image_links(self, page_data: bytes, image_nodes: List[ImageNode]) -> continue # 1 - what to replace, 2 - replace with, 3 is the data to replace page_data = re.sub(img_node.markdown_str.encode(), - img_node.image_relative_path.encode(), page_data) + img_node.get_image_relative_path(page_name).encode(), page_data) return page_data @property diff --git a/bookstack_file_exporter/exporter/node.py b/bookstack_file_exporter/exporter/node.py index ab1714a..d63e17f 100644 --- a/bookstack_file_exporter/exporter/node.py +++ b/bookstack_file_exporter/exporter/node.py @@ -75,6 +75,11 @@ def children(self): """return all children of a book/chapter/shelf""" return self._children + @property + def parent(self): + """return parent of a book/chapter/page""" + return self._parent + @property def empty(self): """return True if page node lacks content""" pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy