Skip to content

Commit cd8b59d

Browse files
author
Montana Low
committed
freeze all requirements and document the project requirements
1 parent c310e19 commit cd8b59d

File tree

9 files changed

+150
-57
lines changed

9 files changed

+150
-57
lines changed

packages/postgresml-python/build.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@ rm "$deb_dir/release.sh"
2929
(cat ${SCRIPT_DIR}/DEBIAN/postrm | envsubst '${PGVERSION}') > "$deb_dir/DEBIAN/postrm"
3030

3131
cp ${SCRIPT_DIR}/../../pgml-extension/requirements.txt "$deb_dir/etc/postgresml-python/requirements.txt"
32-
cp ${SCRIPT_DIR}/../../pgml-extension/requirements-autogptq.txt "$deb_dir/etc/postgresml-python/requirements-autogptq.txt"
33-
cp ${SCRIPT_DIR}/../../pgml-extension/requirements-xformers.txt "$deb_dir/etc/postgresml-python/requirements-xformers.txt"
3432

3533
virtualenv --python="python$PYTHON_VERSION" "$deb_dir/var/lib/postgresml-python/pgml-venv"
3634
source "$deb_dir/var/lib/postgresml-python/pgml-venv/bin/activate"

pgml-cms/docs/resources/developer-docs/installation.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,7 @@ To install the necessary Python packages into a virtual environment, use the `vi
6363
```bash
6464
virtualenv pgml-venv && \
6565
source pgml-venv/bin/activate && \
66-
pip install -r requirements.txt && \
67-
pip install -r requirements-xformers.txt --no-dependencies
66+
pip install -r requirements.txt
6867
```
6968
{% endtab %}
7069

pgml-extension/examples/multi_classification.sql

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ LIMIT 10;
3131

3232
-- linear models
3333
SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'ridge');
34-
SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'stochastic_gradient_descent');
35-
SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'perceptron');
36-
SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'passive_aggressive');
34+
--SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'stochastic_gradient_descent');
35+
--SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'perceptron');
36+
--SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'passive_aggressive');
3737

3838
-- support vector machines
3939
SELECT * FROM pgml.train('Iris Flower Types', algorithm => 'svm');

pgml-extension/examples/transformers.sql

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,19 @@ SELECT pgml.embed('intfloat/e5-small', 'hi mom', '{"device": "cpu"}');
88

99
SELECT pgml.embed('hkunlp/instructor-xl', 'hi mom', '{"instruction": "Encode it with love"}');
1010

11+
SELECT pgml.transform_stream(
12+
task => '{
13+
"task": "text-generation",
14+
"model": "TheBloke/zephyr-7B-beta-GPTQ",
15+
"model_type": "mistral",
16+
"revision": "main",
17+
"device_map": "auto"
18+
}'::JSONB,
19+
input => 'AI is going to',
20+
args => '{
21+
"max_new_tokens": 100
22+
}'::JSONB
23+
);
1124
-- BitsAndBytes support
1225
SELECT pgml.transform(
1326
task => '{

pgml-extension/requirements-autogptq.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

pgml-extension/requirements-xformers.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

pgml-extension/requirements.base.txt

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# The immediate dependencies of PostgresML are maintained here.
2+
3+
# ML
4+
catboost
5+
lightgbm
6+
torch==2.0.1 # 2.1.1 breaks sentence-transformers==2.2.2
7+
torchaudio
8+
torchvision
9+
xgboost
10+
11+
# Transformers
12+
accelerate
13+
auto-gptq; sys_platform == 'linux'
14+
bitsandbytes
15+
ctransformers
16+
huggingface-hub
17+
deepspeed
18+
einops
19+
tokenizers
20+
transformers
21+
xformers; sys_platform == 'linux'
22+
23+
# Embeddings
24+
InstructorEmbedding
25+
sentence-transformers
26+
27+
# Ratings
28+
rouge
29+
sacrebleu
30+
sacremoses
31+
32+
# Utils
33+
datasets
34+
orjson
35+
langchain

pgml-extension/requirements.txt

Lines changed: 89 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,98 @@
1-
accelerate==0.22.0
2-
bitsandbytes==0.41.1
3-
catboost==1.2
1+
accelerate==0.25.0
2+
aiohttp==3.9.1
3+
aiosignal==1.3.1
4+
annotated-types==0.6.0
5+
anyio==4.1.0
6+
attrs==23.1.0
7+
bitsandbytes==0.41.3.post2
8+
catboost==1.2.2
9+
certifi==2023.11.17
10+
charset-normalizer==3.3.2
11+
click==8.1.7
12+
colorama==0.4.6
13+
contourpy==1.2.0
414
ctransformers==0.2.27
5-
datasets==2.14.5
6-
deepspeed==0.10.3
7-
huggingface-hub==0.17.1
15+
cycler==0.12.1
16+
dataclasses-json==0.6.3
17+
datasets==2.15.0
18+
deepspeed==0.12.4
19+
dill==0.3.7
20+
einops==0.7.0
21+
filelock==3.13.1
22+
fonttools==4.46.0
23+
frozenlist==1.4.0
24+
fsspec==2023.10.0
25+
graphviz==0.20.1
26+
hjson==3.1.0
27+
huggingface-hub==0.19.4
28+
idna==3.6
829
InstructorEmbedding==1.0.1
30+
Jinja2==3.1.2
31+
joblib==1.3.2
32+
jsonpatch==1.33
33+
jsonpointer==2.4
34+
kiwisolver==1.4.5
35+
langchain==0.0.349
36+
langchain-community==0.0.1
37+
langchain-core==0.0.13
38+
langsmith==0.0.69
939
lightgbm==4.1.0
10-
orjson==3.9.7
11-
pandas==2.1.0
12-
rich==13.5.2
40+
lxml==4.9.3
41+
MarkupSafe==2.1.3
42+
marshmallow==3.20.1
43+
matplotlib==3.8.2
44+
mpmath==1.3.0
45+
multidict==6.0.4
46+
multiprocess==0.70.15
47+
mypy-extensions==1.0.0
48+
networkx==3.2.1
49+
ninja==1.11.1.1
50+
nltk==3.8.1
51+
numpy==1.26.2
52+
orjson==3.9.10
53+
packaging==23.2
54+
pandas==2.1.4
55+
Pillow==10.1.0
56+
plotly==5.18.0
57+
portalocker==2.8.2
58+
psutil==5.9.6
59+
py-cpuinfo==9.0.0
60+
pyarrow==14.0.1
61+
pyarrow-hotfix==0.6
62+
pydantic==2.5.2
63+
pydantic_core==2.14.5
64+
pynvml==11.5.0
65+
pyparsing==3.1.1
66+
python-dateutil==2.8.2
67+
pytz==2023.3.post1
68+
PyYAML==6.0.1
69+
regex==2023.10.3
70+
requests==2.31.0
1371
rouge==1.0.1
14-
sacrebleu==2.3.1
15-
sacremoses==0.0.53
16-
scikit-learn==1.3.0
17-
sentencepiece==0.1.99
72+
sacrebleu==2.3.3
73+
sacremoses==0.1.1
74+
safetensors==0.4.1
75+
scikit-learn==1.3.2
76+
scipy==1.11.4
1877
sentence-transformers==2.2.2
19-
tokenizers==0.14.1
78+
sentencepiece==0.1.99
79+
six==1.16.0
80+
sniffio==1.3.0
81+
SQLAlchemy==2.0.23
82+
sympy==1.12
83+
tabulate==0.9.0
84+
tenacity==8.2.3
85+
threadpoolctl==3.2.0
86+
tokenizers==0.15.0
2087
torch==2.0.1
2188
torchaudio==2.0.2
2289
torchvision==0.15.2
2390
tqdm==4.66.1
24-
transformers==4.34.1
25-
xgboost==2.0.0
26-
langchain==0.0.287
27-
einops==0.6.1
28-
pynvml==11.5.0
29-
transformers-stream-generator==0.0.4
30-
optimum==1.13.2
31-
peft==0.6.2
32-
pyarrow==11.0.0
91+
transformers==4.36.0
92+
typing-inspect==0.9.0
93+
typing_extensions==4.9.0
94+
tzdata==2023.3
95+
urllib3==2.1.0
96+
xgboost==2.0.2
97+
xxhash==3.4.1
98+
yarl==1.9.4

pgml-extension/src/bindings/transformers/transformers.py

Lines changed: 9 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import shutil
44
import time
55
import queue
6-
import sys
76

87
import datasets
98
from InstructorEmbedding import INSTRUCTOR
@@ -42,7 +41,6 @@
4241
Trainer,
4342
)
4443
from threading import Thread
45-
from typing import Optional
4644

4745
__cache_transformer_by_model_id = {}
4846
__cache_sentence_transformer_by_name = {}
@@ -393,42 +391,28 @@ def transform(task, args, inputs, stream=False):
393391
return orjson.dumps(pipe(inputs, **args), default=orjson_default).decode()
394392

395393

396-
def create_embedding(transformer):
394+
def embed(transformer, inputs, kwargs):
395+
kwargs = orjson.loads(kwargs)
396+
ensure_device(kwargs)
397397
instructor = transformer.startswith("hkunlp/instructor")
398-
klass = INSTRUCTOR if instructor else SentenceTransformer
399-
return klass(transformer)
400398

399+
# Cache the model
400+
if transformer not in __cache_sentence_transformer_by_name:
401+
klass = INSTRUCTOR if instructor else SentenceTransformer
402+
__cache_sentence_transformer_by_name[transformer] = klass(transformer)
403+
model = __cache_sentence_transformer_by_name[transformer]
401404

402-
def embed_using(model, transformer, inputs, kwargs):
403-
if isinstance(kwargs, str):
404-
kwargs = orjson.loads(kwargs)
405-
406-
instructor = transformer.startswith("hkunlp/instructor")
405+
# Handle instruction encoding
407406
if instructor:
408407
texts_with_instructions = []
409408
instruction = kwargs.pop("instruction")
410409
for text in inputs:
411410
texts_with_instructions.append([instruction, text])
412-
413411
inputs = texts_with_instructions
414412

415413
return model.encode(inputs, **kwargs)
416414

417415

418-
def embed(transformer, inputs, kwargs):
419-
kwargs = orjson.loads(kwargs)
420-
421-
ensure_device(kwargs)
422-
423-
if transformer not in __cache_sentence_transformer_by_name:
424-
__cache_sentence_transformer_by_name[transformer] = create_embedding(
425-
transformer
426-
)
427-
model = __cache_sentence_transformer_by_name[transformer]
428-
429-
return embed_using(model, transformer, inputs, kwargs)
430-
431-
432416
def clear_gpu_cache(memory_usage: None):
433417
if not torch.cuda.is_available():
434418
raise PgMLException(f"No GPU available")

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy