# Usage: # mkdir -p ~/archivebox/data && cd ~/archivebox # curl -fsSL 'https://docker-compose.archivebox.io' > docker-compose.yml # docker compose run archivebox version # docker compose run archivebox config --set SAVE_ARCHIVE_DOT_ORG=False # docker compose run archivebox add --depth=1 'https://news.ycombinator.com' # docker compose run -T archivebox add < bookmarks.txt # docker compose up -d && open 'https://localhost:8000' # docker compose run archivebox help # Documentation: # https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker-compose services: archivebox: image: archivebox/archivebox:latest ports: - 8000:8000 volumes: - ./data:/data # ./data/personas/Default/chrome_profile/Default:/data/personas/Default/chrome_profile/Default environment: # - ADMIN_USERNAME=admin # creates an admin user on first run with the given user/pass combo # - ADMIN_PASSWORD=SomeSecretPassword - ALLOWED_HOSTS=* # set this to the hostname(s) you're going to serve the site from! - CSRF_TRUSTED_ORIGINS=http://localhost:8000 # you MUST set this to the server's URL for admin login and the REST API to work - PUBLIC_INDEX=True # set to False to prevent anonymous users from viewing snapshot list - PUBLIC_SNAPSHOTS=True # set to False to prevent anonymous users from viewing snapshot content - PUBLIC_ADD_VIEW=False # set to True to allow anonymous users to submit new URLs to archive - SEARCH_BACKEND_ENGINE=sonic # tells ArchiveBox to use sonic container below for fast full-text search - SEARCH_BACKEND_HOST_NAME=sonic - SEARCH_BACKEND_PASSWORD=SomeSecretPassword # - PUID=911 # set to your host user's UID & GID if you encounter permissions issues # - PGID=911 # UID/GIDs lower than 500 may clash with system uids and are not recommended # For options below, it's better to set in data/ArchiveBox.conf or use `docker compose run archivebox config --set SOME_KEY=someval` instead of setting here: # - MEDIA_MAX_SIZE=750m # increase this filesize limit to allow archiving larger audio/video files # - TIMEOUT=60 # increase this number to 120+ seconds if you see many slow downloads timing out # - CHECK_SSL_VALIDITY=True # set to False to disable strict SSL checking (allows saving URLs w/ broken certs) # - SAVE_ARCHIVE_DOT_ORG=True # set to False to disable submitting all URLs to Archive.org when archiving # - USER_AGENT="..." # set a custom USER_AGENT to avoid being blocked as a bot # ... # For more info, see: https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#configuration # For ad-blocking during archiving, uncomment this section and the pihole service below # networks: # - dns # dns: # - 172.20.0.53 ######## Optional Addons: tweak examples below as needed for your specific use case ######## ### This optional container runs scheduled jobs in the background (and retries failed ones). To add a new job: # $ docker compose run archivebox schedule --add --every=day --depth=1 'https://example.com/some/rss/feed.xml' # then restart the scheduler container to apply any changes to the scheduled task list: # $ docker compose restart archivebox_scheduler # https://github.com/ArchiveBox/ArchiveBox/wiki/Scheduled-Archiving archivebox_scheduler: image: archivebox/archivebox:latest command: schedule --foreground --update --every=day environment: # - PUID=911 # set to your host user's UID & GID if you encounter permissions issues # - PGID=911 - TIMEOUT=120 # use a higher timeout than the main container to give slow tasks more time when retrying - SEARCH_BACKEND_ENGINE=sonic # tells ArchiveBox to use sonic container below for fast full-text search - SEARCH_BACKEND_HOST_NAME=sonic - SEARCH_BACKEND_PASSWORD=SomeSecretPassword # For other config it's better to set using `docker compose run archivebox config --set SOME_KEY=someval` instead of setting here # ... # For more info, see: https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#configuration volumes: - ./data:/data # cpus: 2 # uncomment / edit these values to limit scheduler container resource consumption # mem_limit: 2048m # restart: always ### This runs the optional Sonic full-text search backend (much faster than default rg backend). # If Sonic is ever started after not running for a while, update its full-text index by running: # $ docker-compose run archivebox update --index-only # https://github.com/ArchiveBox/ArchiveBox/wiki/Setting-up-Search sonic: image: archivebox/sonic:latest expose: - 1491 environment: - SEARCH_BACKEND_PASSWORD=SomeSecretPassword volumes: #- ./sonic.cfg:/etc/sonic.cfg:ro # mount to customize: https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/stable/etc/sonic.cfg - ./data/sonic:/var/lib/sonic/store ### This optional container runs xvfb+noVNC so you can watch the ArchiveBox browser as it archives things, # or remote control it to set up a chrome profile w/ login credentials for sites you want to archive. # https://github.com/ArchiveBox/ArchiveBox/wiki/Chromium-Install#setting-up-a-chromium-user-profile # https://github.com/ArchiveBox/ArchiveBox/wiki/Chromium-Install#docker-vnc-setup novnc: image: theasp/novnc:latest environment: - DISPLAY_WIDTH=1920 - DISPLAY_HEIGHT=1080 - RUN_XTERM=no ports: # to view/control ArchiveBox's browser, visit: http://127.0.0.1:8080/vnc.html # restricted to access from localhost by default because it has no authentication - 127.0.0.1:8080:8080 ### Example: Put Nginx in front of the ArchiveBox server for SSL termination and static file serving. # You can also any other ingress provider for SSL like Apache, Caddy, Traefik, Cloudflare Tunnels, etc. # nginx: # image: nginx:alpine # ports: # - 443:443 # - 80:80 # volumes: # - ./etc/nginx.conf:/etc/nginx/nginx.conf # - ./data:/var/www ### Example: To run pihole in order to block ad/tracker requests during archiving, # uncomment this optional block and set up pihole using its admin interface # pihole: # image: pihole/pihole:latest # ports: # # access the admin HTTP interface on http://localhost:8090 # - 127.0.0.1:8090:80 # environment: # - WEBPASSWORD=SET_THIS_TO_SOME_SECRET_PASSWORD_FOR_ADMIN_DASHBOARD # - DNSMASQ_LISTENING=all # dns: # - 127.0.0.1 # - 1.1.1.1 # networks: # dns: # ipv4_address: 172.20.0.53 # volumes: # - ./etc/pihole:/etc/pihole # - ./etc/dnsmasq:/etc/dnsmasq.d ### Example: run all your ArchiveBox traffic through a WireGuard VPN tunnel to avoid IP blocks. # You can also use any other VPN that works at the docker/IP level, e.g. Tailscale, OpenVPN, etc. # wireguard: # image: linuxserver/wireguard:latest # network_mode: 'service:archivebox' # cap_add: # - NET_ADMIN # - SYS_MODULE # sysctls: # - net.ipv4.conf.all.rp_filter=2 # - net.ipv4.conf.all.src_valid_mark=1 # volumes: # - /lib/modules:/lib/modules # - ./wireguard.conf:/config/wg0.conf:ro ### Example: Run ChangeDetection.io to watch for changes to websites, then trigger ArchiveBox to archive them # Documentation: https://github.com/dgtlmoon/changedetection.io # More info: https://github.com/dgtlmoon/changedetection.io/blob/master/docker-compose.yml # changedetection: # image: ghcr.io/dgtlmoon/changedetection.io # volumes: # - ./data-changedetection:/datastore ### Example: Run PYWB in parallel and auto-import WARCs from ArchiveBox # pywb: # image: webrecorder/pywb:latest # entrypoint: /bin/sh -c '(wb-manager init default || test $$? -eq 2) && wb-manager add default /archivebox/archive/*/warc/*.warc.gz; wayback;' # environment: # - INIT_COLLECTION=archivebox # ports: # - 8686:8080 # volumes: # - ./data:/archivebox # - ./data/wayback:/webarchive networks: # network just used for pihole container to offer :53 dns resolving on fixed ip for archivebox container dns: ipam: driver: default config: - subnet: 172.20.0.0/24 # HOW TO: Set up cloud storage for your ./data/archive (e.g. Amazon S3, Backblaze B2, Google Drive, OneDrive, SFTP, etc.) # https://github.com/ArchiveBox/ArchiveBox/wiki/Setting-Up-Storage # # Follow the steps here to set up the Docker RClone Plugin https://rclone.org/docker/ # $ docker plugin install rclone/docker-volume-rclone:amd64 --grant-all-permissions --alias rclone # $ nano /var/lib/docker-plugins/rclone/config/rclone.conf # [examplegdrive] # type = drive # scope = drive # drive_id = 1234567... # root_folder_id = 0Abcd... # token = {"access_token":...} # volumes: # archive: # driver: rclone # driver_opts: # remote: 'examplegdrive:archivebox' # allow_other: 'true' # vfs_cache_mode: full # poll_interval: 0
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: