diff --git a/Dockerfile.tmpl b/Dockerfile.tmpl index 9ca776a9..a8a7880f 100644 --- a/Dockerfile.tmpl +++ b/Dockerfile.tmpl @@ -2,9 +2,6 @@ ARG BASE_IMAGE \ BASE_IMAGE_TAG \ LIGHTGBM_VERSION -{{ if eq .Accelerator "gpu" }} -FROM gcr.io/kaggle-images/python-lightgbm-whl:${BASE_IMAGE_TAG}-${LIGHTGBM_VERSION} AS lightgbm_whl -{{ end }} FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} #b/415358342: UV reports missing requirements files https://github.com/googlecolab/colabtools/issues/5237 @@ -14,15 +11,12 @@ ENV UV_CONSTRAINT= \ ADD kaggle_requirements.txt /kaggle_requirements.txt # Freeze existing requirements from base image for critical packages: -RUN pip freeze | grep -E 'tensorflow|keras|torch|jax|lightgbm' > /colab_requirements.txt +RUN pip freeze | grep -E 'tensorflow|keras|torch|jax' > /colab_requirements.txt # Merge requirements files: RUN cat /colab_requirements.txt >> /requirements.txt RUN cat /kaggle_requirements.txt >> /requirements.txt -# TODO: GPU requirements.txt -# TODO: merge them better (override matching ones). - # Install Kaggle packages RUN uv pip install --system -r /requirements.txt @@ -64,23 +58,6 @@ ADD patches/template_conf.json /opt/kaggle/conf.json # /opt/conda/lib/python3.11/site-packages ARG PACKAGE_PATH=/usr/local/lib/python3.11/dist-packages -# Install GPU-specific non-pip packages. -{{ if eq .Accelerator "gpu" }} -RUN uv pip install --system "pycuda" - -# b/381256047 Remove once installed in Colabs base image. -# Install LightGBM -COPY --from=lightgbm_whl /tmp/whl/*.whl /tmp/lightgbm/ -# Install OpenCL (required by LightGBM GPU version) -RUN apt-get install -y ocl-icd-libopencl1 clinfo && \ - mkdir -p /etc/OpenCL/vendors && \ - echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd && \ - uv pip install --system /tmp/lightgbm/*.whl && \ - rm -rf /tmp/lightgbm && \ - /tmp/clean-layer.sh -{{ end }} - - # Use a fixed apt-get repo to stop intermittent failures due to flaky httpredir connections, # as described by Lionel Chan at http://stackoverflow.com/a/37426929/5881346 RUN sed -i "s/httpredir.debian.org/debian.uchicago.edu/" /etc/apt/sources.list && \ @@ -100,27 +77,7 @@ ADD patches/keras_internal.py \ RUN apt-get install -y libfreetype6-dev && \ apt-get install -y libglib2.0-0 libxext6 libsm6 libxrender1 libfontconfig1 --fix-missing -# NLTK Project datasets -# b/408298750: We currently reinstall the package, because we get the following error: -# `AttributeError: module 'inspect' has no attribute 'formatargspec'. Did you mean: 'formatargvalues'?` RUN uv pip install --system --force-reinstall "nltk>=3.9.1" -RUN mkdir -p /usr/share/nltk_data && \ - # NLTK Downloader no longer continues smoothly after an error, so we explicitly list - # the corpuses that work - python -m nltk.downloader -d /usr/share/nltk_data abc alpino averaged_perceptron_tagger \ - basque_grammars biocreative_ppi bllip_wsj_no_aux \ - book_grammars brown brown_tei cess_cat cess_esp chat80 city_database cmudict \ - comtrans conll2000 conll2002 conll2007 crubadan dependency_treebank \ - europarl_raw floresta gazetteers genesis gutenberg \ - ieer inaugural indian jeita kimmo knbc large_grammars lin_thesaurus mac_morpho machado \ - masc_tagged maxent_ne_chunker maxent_treebank_pos_tagger moses_sample movie_reviews \ - mte_teip5 names nps_chat omw opinion_lexicon paradigms \ - pil pl196x porter_test ppattach problem_reports product_reviews_1 product_reviews_2 propbank \ - pros_cons ptb punkt punkt_tab qc reuters rslp rte sample_grammars semcor senseval sentence_polarity \ - sentiwordnet shakespeare sinica_treebank smultron snowball_data spanish_grammars \ - state_union stopwords subjectivity swadesh switchboard tagsets timit toolbox treebank \ - twitter_samples udhr2 udhr unicode_samples universal_tagset universal_treebanks_v20 \ - vader_lexicon verbnet webtext word2vec_sample wordnet wordnet_ic words ycoe RUN apt-get install -y git-lfs && \ # vtk dependencies diff --git a/config.txt b/config.txt index cfe8026a..1c378446 100644 --- a/config.txt +++ b/config.txt @@ -1,5 +1,4 @@ BASE_IMAGE=us-docker.pkg.dev/colab-images/public/runtime -BASE_IMAGE_TAG=release-colab_20250404-060113_RC00 -LIGHTGBM_VERSION=4.6.0 +BASE_IMAGE_TAG=release-colab_20250602-060052_RC00 CUDA_MAJOR_VERSION=12 CUDA_MINOR_VERSION=5 diff --git a/kaggle_requirements.txt b/kaggle_requirements.txt index 22b26470..8ba7bd83 100644 --- a/kaggle_requirements.txt +++ b/kaggle_requirements.txt @@ -1,6 +1,4 @@ # Please keep this in alphabetical order -Altair>=5.4.0 -Babel Boruta Cartopy ImageHash @@ -24,7 +22,6 @@ category-encoders cesium comm cytoolz -dask-expr # Older versions of datasets fail with "Loading a dataset cached in a LocalFileSystem is not supported" # https://stackoverflow.com/questions/77433096/notimplementederror-loading-a-dataset-cached-in-a-localfilesystem-is-not-suppor datasets>=2.14.6 @@ -35,7 +32,6 @@ easyocr # b/302136621: Fix eli5 import for learntools eli5 emoji -fastcore>=1.7.20 fasttext featuretools fiona @@ -138,18 +134,10 @@ shap==0.44.1 squarify tensorflow-cloud tensorflow-io -tensorflow-text -tensorflow_decision_forests -timm torchao torchinfo torchmetrics -torchtune -transformers>=4.51.0 -triton -tsfresh vtk -wandb wavio # b/350573866: xgboost v2.1.0 breaks learntools xgboost==2.0.3 pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy