diff --git a/README.md b/README.md index 9e6a5c1a2..6d0caf828 100644 --- a/README.md +++ b/README.md @@ -195,69 +195,9 @@ Colocating the compute with the data inside the database removes one of the most -### Installation in WSL or Ubuntu +### Development -Install Python3, pip, and Pl/Python3: - -```bash -sudo apt update -sudo apt install -y postgresql-plpython3-12 python3 python3-pip -``` - -Restart the Postgres server: - -```bash -sudo service postgresql restart -``` - -Create the extension: - -```sql -CREATE EXTENSION plpython3u; -``` - -Install Scikit globally (I didn't bother setup Postgres with a virtualenv, but it's possible): - -``` -sudo pip3 install sklearn -``` - -### Run the example - -```bash -psql -f scikit_train_and_predict.sql -``` - -Example output: - -``` -psql:scikit_train_and_predict.sql:4: NOTICE: drop cascades to view scikit_train_view -DROP TABLE -CREATE TABLE -psql:scikit_train_and_predict.sql:14: NOTICE: view "scikit_train_view" does not exist, skipping -DROP VIEW -CREATE VIEW -INSERT 0 500 -CREATE FUNCTION - scikit_learn_train_example ----------------------------- - OK -(1 row) - -CREATE FUNCTION - value | weight | prediction --------+--------+------------ - 1 | 5 | 5 - 2 | 5 | 5 - 3 | 5 | 5 - 4 | 5 | 5 - 5 | 5 | 5 -(5 rows) -``` - -### Run the linear model - -Install our PgML package globally: +Follow the installation instructions to create a local working Postgres environment, then install your PgML package from the git repository: ``` cd pgml diff --git a/scikit_import.sql b/scikit_import.sql deleted file mode 100644 index 1afcc6e66..000000000 --- a/scikit_import.sql +++ /dev/null @@ -1,10 +0,0 @@ -CREATE EXTENSION IF NOT EXISTS plpython3u; - -CREATE OR REPLACE FUNCTION pgml_version() -RETURNS TEXT -AS $$ - import pgml - return pgml.version() -$$ LANGUAGE plpython3u; - -SELECT pgml_version(); diff --git a/scikit_train_and_predict.sql b/scikit_train_and_predict.sql deleted file mode 100644 index 6f8b5c990..000000000 --- a/scikit_train_and_predict.sql +++ /dev/null @@ -1,71 +0,0 @@ --- --- CREATE EXTENSION --- -CREATE EXTENSION IF NOT EXISTS plpython3u; - --- --- Data table. --- -DROP TABLE IF EXISTS scikit_train_data CASCADE; -CREATE TABLE scikit_train_data ( - id BIGSERIAL PRIMARY KEY, - value BIGINT, - weight DOUBLE PRECISION -); - --- --- View of the data table, just to demonstrate that views work. --- -DROP VIEW IF EXISTS scikit_train_view; -CREATE VIEW scikit_train_view AS SELECT * FROM scikit_train_data; - --- --- Insert some dummy data into the data table. --- -INSERT INTO scikit_train_data (value, weight) SELECT generate_series(1, 500), 5.0; - - -CREATE OR REPLACE FUNCTION scikit_learn_train_example() -RETURNS BYTEA -AS $$ - from sklearn.ensemble import RandomForestClassifier - import pickle - - cursor = plpy.cursor("SELECT value, weight FROM scikit_train_view") - X = [] - y = [] - - while True: - rows = cursor.fetch(5) - if not rows: - break - for row in rows: - X.append([row["value"],]) - y.append(row["weight"]) - rfc = RandomForestClassifier() - rfc.fit(X, y) - - return pickle.dumps(rfc) - -$$ LANGUAGE plpython3u; - -; - -CREATE OR REPLACE FUNCTION scikit_learn_predict_example(model BYTEA, value INT) -RETURNS DOUBLE PRECISION -AS $$ - import pickle - - m = pickle.loads(model) - - r = m.predict([[value,]]) - return r[0] -$$ LANGUAGE plpython3u; - -WITH model as ( - SELECT scikit_learn_train_example() AS pickle -) -SELECT value, - weight, - scikit_learn_predict_example((SELECT model.pickle FROM model), value::int) AS prediction -FROM scikit_train_view LIMIT 5; diff --git a/benchmarks.sql b/sql/benchmarks.sql similarity index 100% rename from benchmarks.sql rename to sql/benchmarks.sql
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: