Skip to content

Commit da81ecf

Browse files
authored
Merge pull request #16 from postgresml/montana/housing
add the california housing example
2 parents f917003 + ff78f3e commit da81ecf

File tree

5 files changed

+76
-3
lines changed

5 files changed

+76
-3
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,4 +211,9 @@ Run the test:
211211
psql -f sql/test.sql
212212
```
213213

214+
One liner:
215+
```
216+
cd pgml; sudo python3 setup.py install; cd ../; sudo -u postgres psql -f sql/test.sql
217+
```
218+
214219
Make sure to run it exactly like this, from the root directory of the repo.

examples/california_housing/run.sql

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
-- This example trains models on the sklean california_housing dataset
2+
-- which is a copy of the test set from the StatLib repository
3+
-- https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html
4+
--
5+
-- This demonstrates using a table with individual columns as features
6+
-- for regression.
7+
SELECT pgml.load_dataset('california_housing');
8+
9+
-- view the dataset
10+
SELECT * from pgml.california_housing;
11+
12+
-- train a simple model to classify the data
13+
SELECT pgml.train('California Housing Prediction', 'regression', 'pgml.california_housing', 'target');
14+
15+
-- check out the predictions
16+
SELECT target, pgml.predict('California Housing Prediction', ARRAY[median_income, house_age, avg_rooms, avg_bedrooms, population, avg_occupants, latitude, longitude]) AS prediction
17+
FROM pgml.california_housing
18+
LIMIT 10;
19+
20+
-- -- train some more models with different algorithms
21+
SELECT pgml.train('California Housing Prediction', 'regression', 'pgml.california_housing', 'target', 'svm');
22+
SELECT pgml.train('California Housing Prediction', 'regression', 'pgml.california_housing', 'target', 'random_forest');
23+
SELECT pgml.train('California Housing Prediction', 'regression', 'pgml.california_housing', 'target', 'gradient_boosting_trees');
24+
-- TODO SELECT pgml.train('California Housing Prediction', 'regression', 'pgml.california_housing', 'target', 'dense_neural_network');
25+
-- -- check out all that hard work
26+
SELECT * FROM pgml.trained_models;
27+
28+
-- deploy the random_forest model for prediction use
29+
SELECT pgml.deploy('California Housing Prediction', 'random_forest');
30+
-- check out that throughput
31+
SELECT * FROM pgml.deployed_models;
32+
33+
-- do some hyper param tuning
34+
-- TODO SELECT pgml.hypertune(100, 'California Housing Prediction', 'regression', 'pgml.california_housing', 'target', 'gradient_boosted_trees');
35+
-- deploy the "best" model for prediction use
36+
SELECT pgml.deploy('California Housing Prediction', 'best_fit');
37+
38+
-- check out the improved predictions
39+
SELECT target, pgml.predict('California Housing Prediction', ARRAY[median_income, house_age, avg_rooms, avg_bedrooms, population, avg_occupants, latitude, longitude]) AS prediction
40+
FROM pgml.california_housing
41+
LIMIT 10;

examples/digits/run.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
-- which is a copy of the test set of the UCI ML hand-written digits datasets
33
-- https://archive.ics.uci.edu/ml/datasets/Optical+Recognition+of+Handwritten+Digits
44
--
5+
-- This demonstrates using a table with a single array feature column
6+
-- for classification.
7+
--
58
-- The final result after a few seconds of training is not terrible. Maybe not perfect
69
-- enough for mission critical applications, but it's telling how quickly "off the shelf"
710
-- solutions can solve problems these days.

pgml/pgml/datasets.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,45 @@
11
import plpy
2-
from sklearn.datasets import load_digits as d
2+
import sklearn.datasets
33

44
from pgml.sql import q
55
from pgml.exceptions import PgMLException
66

77
def load(source: str):
88
if source == "digits":
99
load_digits()
10+
elif source == "california_housing":
11+
load_california_housing()
1012
else:
1113
raise PgMLException(f"Invalid dataset name: {source}. Valid values are ['digits'].")
1214
return "OK"
1315

1416
def load_digits():
15-
dataset = d()
17+
dataset = sklearn.datasets.load_digits()
1618
a = plpy.execute("DROP TABLE IF EXISTS pgml.digits")
1719
a = plpy.execute("CREATE TABLE pgml.digits (image SMALLINT[], target INTEGER)")
1820
a = plpy.execute(f"""COMMENT ON TABLE pgml.digits IS {q(dataset["DESCR"])}""")
1921
for X, y in zip(dataset["data"], dataset["target"]):
2022
X = ",".join("%i" % x for x in list(X))
2123
plpy.execute(f"""INSERT INTO pgml.digits (image, target) VALUES ('{{{X}}}', {y})""")
24+
25+
def load_california_housing():
26+
dataset = sklearn.datasets.fetch_california_housing()
27+
a = plpy.execute("DROP TABLE IF EXISTS pgml.california_housing")
28+
a = plpy.execute("""
29+
CREATE TABLE pgml.california_housing (
30+
median_income FLOAT4, -- median income in block group
31+
house_age FLOAT4, -- median house age in block group
32+
avg_rooms FLOAT4, -- average number of rooms per household
33+
avg_bedrooms FLOAT4, -- average number of bedrooms per household
34+
population FLOAT4, -- block group population
35+
avg_occupants FLOAT4, -- average number of household members
36+
latitude FLOAT4, -- block group latitude
37+
longitude FLOAT4, -- block group longitudetarget INTEGER
38+
target FLOAT4
39+
)""")
40+
a = plpy.execute(f"""COMMENT ON TABLE pgml.california_housing IS {q(dataset["DESCR"])}""")
41+
for X, y in zip(dataset["data"], dataset["target"]):
42+
plpy.execute(f"""
43+
INSERT INTO pgml.california_housing (median_income, house_age, avg_rooms, avg_bedrooms, population, avg_occupants, latitude, longitude, target)
44+
VALUES ({q(X[0])}, {q(X[1])}, {q(X[2])}, {q(X[3])}, {q(X[4])}, {q(X[5])}, {q(X[6])}, {q(X[7])}, {q(y)})""")
45+

sql/install.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ $$ LANGUAGE plpython3u;
136136
---
137137
--- Predict
138138
---
139-
CREATE OR REPLACE FUNCTION pgml.predict(project_name TEXT, features NUMERIC[])
139+
CREATE OR REPLACE FUNCTION pgml.predict(project_name TEXT, features DOUBLE PRECISION[])
140140
RETURNS DOUBLE PRECISION
141141
AS $$
142142
from pgml.model import Project

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy