diff --git a/CHANGELOG.md b/CHANGELOG.md index ea38e816..c8317c9b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +# [1.21.0](https://github.com/javascriptdata/scikit.js/compare/v1.20.0...v1.21.0) (2022-05-08) + + +### Features + +* not complete serialization ([7fbea07](https://github.com/javascriptdata/scikit.js/commit/7fbea07431dbd194259e44e868df74617814f6de)) +* updated serialization ([ec71323](https://github.com/javascriptdata/scikit.js/commit/ec713230efbf07ac228c0291c21f5fcc0b1bd995)) +* updated test ([260c134](https://github.com/javascriptdata/scikit.js/commit/260c1347d7c06c494b24b813c00c11d68c4da354)) + # [1.20.0](https://github.com/javascriptdata/scikit.js/compare/v1.19.0...v1.20.0) (2022-04-26) diff --git a/docs/convert.js b/docs/convert.js index ef76d67e..ddd5c88d 100644 --- a/docs/convert.js +++ b/docs/convert.js @@ -189,7 +189,6 @@ function getTypeName(val, bigObj) { } function generateProperties(jsonClass, bigObj) { - // console.log(jsonClass.children) let interface = getInterfaceForClass(jsonClass, bigObj) let allConstructorArgs = [] if (interface && interface.children) { diff --git a/package-lock.json b/package-lock.json index c40cd259..83c3e9fd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,17 +1,18 @@ { "name": "scikitjs", - "version": "1.20.0", + "version": "1.21.0", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "scikitjs", - "version": "1.20.0", + "version": "1.21.0", "hasInstallScript": true, "license": "ISC", "dependencies": { "@tensorflow/tfjs": "^3.16.0", "@tensorflow/tfjs-node": "^3.16.0", + "base64-arraybuffer": "^1.0.2", "lodash": "^4.17.21", "mathjs": "^10.0.0", "simple-statistics": "^7.7.0" @@ -4757,6 +4758,14 @@ "version": "1.0.2", "license": "MIT" }, + "node_modules/base64-arraybuffer": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/base64-arraybuffer/-/base64-arraybuffer-1.0.2.tgz", + "integrity": "sha512-I3yl4r9QB5ZRY3XuJVEPfc2XhZO6YweFPI+UovAzn+8/hb3oJ6lnysaFcjVpkCPfVWFUDvoZ8kmVDP7WyRtYtQ==", + "engines": { + "node": ">= 0.6.0" + } + }, "node_modules/base64id": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/base64id/-/base64id-2.0.0.tgz", @@ -19938,6 +19947,11 @@ "balanced-match": { "version": "1.0.2" }, + "base64-arraybuffer": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/base64-arraybuffer/-/base64-arraybuffer-1.0.2.tgz", + "integrity": "sha512-I3yl4r9QB5ZRY3XuJVEPfc2XhZO6YweFPI+UovAzn+8/hb3oJ6lnysaFcjVpkCPfVWFUDvoZ8kmVDP7WyRtYtQ==" + }, "base64id": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/base64id/-/base64id-2.0.0.tgz", diff --git a/package.json b/package.json index 2360944b..339f817f 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "scikitjs", - "version": "1.20.0", + "version": "1.21.0", "description": "Scikit-Learn for JS", "output": { "node": "dist/node/index.js", @@ -51,6 +51,7 @@ "dependencies": { "@tensorflow/tfjs": "^3.16.0", "@tensorflow/tfjs-node": "^3.16.0", + "base64-arraybuffer": "^1.0.2", "lodash": "^4.17.21", "mathjs": "^10.0.0", "simple-statistics": "^7.7.0" diff --git a/src/cluster/KMeans.test.ts b/src/cluster/KMeans.test.ts index 6aa15ea9..b5f3ce2a 100644 --- a/src/cluster/KMeans.test.ts +++ b/src/cluster/KMeans.test.ts @@ -1,5 +1,4 @@ -import { KMeans } from './KMeans' - +import { fromJSON, KMeans } from '../index' // Next steps: Improve on kmeans cluster testing describe('KMeans', () => { const X = [ @@ -38,7 +37,7 @@ describe('KMeans', () => { ) }) - it('should save kmeans model', () => { + it('should save kmeans model', async () => { const expectedResult = { name: 'KMeans', nClusters: 2, @@ -48,7 +47,7 @@ describe('KMeans', () => { randomState: 0, nInit: 10, clusterCenters: { - type: 'Tensor', + name: 'Tensor', value: [ [2.5, 1], [2.5, 4] @@ -57,20 +56,20 @@ describe('KMeans', () => { } const kmean = new KMeans({ nClusters: 2, randomState: 0 }) kmean.fit(X) - const ksave = kmean.toJson() as string + const ksave = await kmean.toObject() - expect(expectedResult).toEqual(JSON.parse(ksave)) + expect(expectedResult).toEqual(ksave) }) - it('should load serialized kmeans model', () => { + it('should load serialized kmeans model', async () => { const centroids = [ [2.5, 1], [2.5, 4] ] const kmean = new KMeans({ nClusters: 2, randomState: 0 }) kmean.fit(X) - const ksave = kmean.toJson() as string - const ksaveModel = new KMeans().fromJson(ksave) + const ksave = await kmean.toJSON() + const ksaveModel = await fromJSON(ksave) expect(centroids).toEqual(ksaveModel.clusterCenters.arraySync()) }) diff --git a/src/cluster/KMeans.ts b/src/cluster/KMeans.ts index d5810cd1..f154084e 100644 --- a/src/cluster/KMeans.ts +++ b/src/cluster/KMeans.ts @@ -1,6 +1,6 @@ import { Scikit2D } from '../types' import { convertToNumericTensor2D, sampleWithoutReplacement } from '../utils' -import Serialize from '../serialize' +import { Serialize } from '../simpleSerializer' import { tf } from '../shared/globals' /* diff --git a/src/compose/ColumnTransformer.test.ts b/src/compose/ColumnTransformer.test.ts index 063da473..b31a918e 100644 --- a/src/compose/ColumnTransformer.test.ts +++ b/src/compose/ColumnTransformer.test.ts @@ -1,6 +1,9 @@ -import { ColumnTransformer } from './ColumnTransformer' -import { MinMaxScaler } from '../preprocessing/MinMaxScaler' -import { SimpleImputer } from '../impute/SimpleImputer' +import { + fromJSON, + SimpleImputer, + MinMaxScaler, + ColumnTransformer +} from '../index' import * as dfd from 'danfojs-node' describe('ColumnTransformer', function () { @@ -30,4 +33,26 @@ describe('ColumnTransformer', function () { expect(result.arraySync()).toEqual(expected) }) + it('ColumnTransformer serialize/deserialize test', async function () { + const X = [ + [2, 2], // [1, .5] + [2, 3], // [1, .75] + [0, NaN], // [0, 1] + [2, 0] // [.5, 0] + ] + let newDf = new dfd.DataFrame(X) + + const transformer = new ColumnTransformer({ + transformers: [ + ['minmax', new MinMaxScaler(), [0]], + ['simpleImpute', new SimpleImputer({ strategy: 'median' }), [1]] + ] + }) + + transformer.fitTransform(newDf) + let obj = await transformer.toJSON() + let myResult = await fromJSON(obj) + + expect(myResult.transformers.length).toEqual(2) + }) }) diff --git a/src/compose/ColumnTransformer.ts b/src/compose/ColumnTransformer.ts index 0f89059a..096337c1 100644 --- a/src/compose/ColumnTransformer.ts +++ b/src/compose/ColumnTransformer.ts @@ -1,5 +1,6 @@ -import { DataFrameInterface, Scikit1D, Scikit2D, Transformer } from '../types' -import { isDataFrameInterface, isScikitLike2D } from '../typesUtils' +import { DataFrameInterface, Scikit1D, Transformer } from '../types' +import { isDataFrameInterface } from '../typesUtils' +import { Serialize } from '../simpleSerializer' import { tf } from '../shared/globals' /* Next steps: @@ -64,7 +65,7 @@ export interface ColumnTransformerParams { ] * ``` */ -export class ColumnTransformer { +export class ColumnTransformer extends Serialize { transformers: TransformerTriple remainder: Transformer | 'drop' | 'passthrough' @@ -75,6 +76,7 @@ export class ColumnTransformer { transformers = [], remainder = 'drop' }: ColumnTransformerParams = {}) { + super() this.transformers = transformers this.remainder = remainder } diff --git a/src/dummy/DummyClassifier.test.ts b/src/dummy/DummyClassifier.test.ts index 858c5adc..b1f0e920 100644 --- a/src/dummy/DummyClassifier.test.ts +++ b/src/dummy/DummyClassifier.test.ts @@ -1,5 +1,4 @@ -import { DummyClassifier } from './DummyClassifier' - +import { DummyClassifier, fromJSON } from '../index' describe('DummyClassifier', function () { it('Use DummyClassifier on simple example (mostFrequent)', function () { const clf = new DummyClassifier() @@ -51,7 +50,7 @@ describe('DummyClassifier', function () { expect(scaler.classes).toEqual([1, 2, 3]) }) - it('should serialize DummyClassifier', function () { + it('should serialize DummyClassifier', async function () { const clf = new DummyClassifier() const X = [ @@ -70,10 +69,10 @@ describe('DummyClassifier', function () { } clf.fit(X, y) - const clfSave = clf.toJson() as string - expect(expectedResult).toEqual(JSON.parse(clfSave)) + const clfSave = await clf.toObject() + expect(expectedResult).toEqual(clfSave) }) - it('should load DummyClassifier', function () { + it('should load DummyClassifier', async function () { const clf = new DummyClassifier() const X = [ @@ -85,8 +84,8 @@ describe('DummyClassifier', function () { const y = [10, 20, 20, 30] clf.fit(X, y) - const clfSave = clf.toJson() as string - const newClf = new DummyClassifier().fromJson(clfSave) + const clfSave = await clf.toJSON() + const newClf = await fromJSON(clfSave) expect(clf).toEqual(newClf) }) }) diff --git a/src/dummy/DummyRegressor.test.ts b/src/dummy/DummyRegressor.test.ts index 04299b7e..6c79feb7 100644 --- a/src/dummy/DummyRegressor.test.ts +++ b/src/dummy/DummyRegressor.test.ts @@ -1,4 +1,4 @@ -import { DummyRegressor } from './DummyRegressor' +import { DummyRegressor, fromJSON } from '../index' describe('DummyRegressor', function () { it('Use DummyRegressor on simple example (mean)', function () { @@ -55,7 +55,7 @@ describe('DummyRegressor', function () { reg.fit(X, y) expect(reg.predict(predictX).arraySync()).toEqual([10, 10, 10]) }) - it('Should save DummyRegressor', function () { + it('Should save DummyRegressor', async function () { const reg = new DummyRegressor({ strategy: 'constant', constant: 10 }) const X = [ @@ -68,15 +68,16 @@ describe('DummyRegressor', function () { name: 'DummyRegressor', EstimatorType: 'regressor', strategy: 'constant', - constant: 10 + constant: 10, + quantile: undefined } reg.fit(X, y) - expect(saveResult).toEqual(JSON.parse(reg.toJson() as string)) + expect(saveResult).toEqual(await reg.toObject()) }) - it('Should load serialized DummyRegressor', function () { + it('Should load serialized DummyRegressor', async function () { const reg = new DummyRegressor({ strategy: 'constant', constant: 10 }) const X = [ @@ -92,8 +93,8 @@ describe('DummyRegressor', function () { ] reg.fit(X, y) - const saveReg = reg.toJson() as string - const newReg = new DummyRegressor().fromJson(saveReg) + const saveReg = await reg.toJSON() + const newReg = await fromJSON(saveReg) expect(newReg.predict(predictX).arraySync()).toEqual([10, 10, 10]) }) diff --git a/src/ensemble/VotingClassifier.test.ts b/src/ensemble/VotingClassifier.test.ts index d741c00a..cafb9973 100644 --- a/src/ensemble/VotingClassifier.test.ts +++ b/src/ensemble/VotingClassifier.test.ts @@ -1,7 +1,10 @@ -import { makeVotingClassifier, VotingClassifier } from './VotingClassifier' -import { DummyClassifier } from '../dummy/DummyClassifier' - -import { LogisticRegression } from '../linear_model/LogisticRegression' +import { + makeVotingClassifier, + VotingClassifier, + DummyClassifier, + LogisticRegression, + fromJSON +} from '../index' describe('VotingClassifier', function () { it('Use VotingClassifier on simple example (voting = hard)', async function () { @@ -118,8 +121,8 @@ describe('VotingClassifier', function () { await voter.fit(X, y) - const savedModel = (await voter.toJson()) as string - const newModel = new VotingClassifier({}).fromJson(savedModel) + const savedModel = await voter.toJSON() + const newModel = await fromJSON(savedModel) expect(newModel.predict(X).arraySync()).toEqual([1, 1, 1, 1, 1]) }, 30000) diff --git a/src/ensemble/VotingClassifier.ts b/src/ensemble/VotingClassifier.ts index 5db1241e..ce93c91a 100644 --- a/src/ensemble/VotingClassifier.ts +++ b/src/ensemble/VotingClassifier.ts @@ -2,7 +2,6 @@ import { Scikit1D, Scikit2D } from '../types' import { tf } from '../shared/globals' import { ClassifierMixin } from '../mixins' import { LabelEncoder } from '../preprocessing/LabelEncoder' -import { fromJson, toJson } from './serializeEnsemble' /* Next steps: @@ -154,15 +153,6 @@ export class VotingClassifier extends ClassifierMixin { ): Promise | Array> { return (await this.fit(X, y)).transform(X) } - - public fromJson(model: string) { - return fromJson(this, model) - } - - public async toJson(): Promise { - const classJson = JSON.parse(super.toJson() as string) - return toJson(this, classJson) - } } export function makeVotingClassifier(...args: any[]) { diff --git a/src/ensemble/VotingRegressor.test.ts b/src/ensemble/VotingRegressor.test.ts index 06a69f97..7782ab86 100644 --- a/src/ensemble/VotingRegressor.test.ts +++ b/src/ensemble/VotingRegressor.test.ts @@ -1,6 +1,10 @@ -import { makeVotingRegressor, VotingRegressor } from './VotingRegressor' -import { DummyRegressor } from '../dummy/DummyRegressor' -import { LinearRegression } from '../linear_model/LinearRegression' +import { + makeVotingRegressor, + VotingRegressor, + fromJSON, + DummyRegressor, + LinearRegression +} from '../index' describe('VotingRegressor', function () { it('Use VotingRegressor on simple example ', async function () { @@ -51,8 +55,8 @@ describe('VotingRegressor', function () { await voter.fit(X, y) - const savedModel = (await voter.toJson()) as string - const newModel = new VotingRegressor({}).fromJson(savedModel) + const savedModel = await voter.toJSON() + const newModel = await fromJSON(savedModel) expect(newModel.score(X, y)).toEqual(voter.score(X, y)) }, 30000) }) diff --git a/src/ensemble/VotingRegressor.ts b/src/ensemble/VotingRegressor.ts index db3d4973..41396e4d 100644 --- a/src/ensemble/VotingRegressor.ts +++ b/src/ensemble/VotingRegressor.ts @@ -1,7 +1,6 @@ import { Scikit1D, Scikit2D } from '../types' import { tf } from '../shared/globals' import { RegressorMixin } from '../mixins' -import { fromJson, toJson } from './serializeEnsemble' /* Next steps: 0. Write validation code to check Estimator inputs @@ -95,15 +94,6 @@ export class VotingRegressor extends RegressorMixin { public async fitTransform(X: Scikit2D, y: Scikit1D) { return (await this.fit(X, y)).transform(X) } - - public fromJson(model: string) { - return fromJson(this, model) as this - } - - public async toJson(): Promise { - const classJson = JSON.parse(super.toJson() as string) - return toJson(this, classJson) - } } /** diff --git a/src/ensemble/serializeEnsemble.ts b/src/ensemble/serializeEnsemble.ts deleted file mode 100644 index 245c89df..00000000 --- a/src/ensemble/serializeEnsemble.ts +++ /dev/null @@ -1,90 +0,0 @@ -import { DummyClassifier } from '../dummy/DummyClassifier' -import { DummyRegressor } from '../dummy/DummyRegressor' -import { LogisticRegression } from '../linear_model/LogisticRegression' -import { RidgeRegression } from '../linear_model/RidgeRegression' -import { LinearRegression } from '../linear_model/LinearRegression' -import { LassoRegression } from '../linear_model/LassoRegression' -import { ElasticNet } from '../linear_model/ElasticNet' -import { LabelEncoder } from '../preprocessing/LabelEncoder' -import { SimpleImputer } from '../impute/SimpleImputer' -import { tf } from '../shared/globals' -import { MinMaxScaler } from '../preprocessing/MinMaxScaler' - -function getEstimator(name: string, serialJson: string) { - switch (name) { - case 'DummyClassifier': - return new DummyClassifier().fromJson(serialJson) - case 'DummyRegressor': - return new DummyRegressor().fromJson(serialJson) - case 'LogisticRegression': - return new LogisticRegression().fromJson(serialJson) - case 'RidgeRegression': - return new RidgeRegression().fromJson(serialJson) - case 'LinearRegression': - return new LinearRegression().fromJson(serialJson) - case 'LassoRegression': - return new LassoRegression().fromJson(serialJson) - case 'ElasticNet': - return new ElasticNet().fromJson(serialJson) - case 'SimpleImputer': - return new SimpleImputer().fromJson(serialJson) - case 'MinMaxScaler': - return new MinMaxScaler().fromJson(serialJson) - default: - throw new Error(`${name} estimator not supported`) - } -} - -export function fromJson(classConstructor: any, model: string) { - let jsonClass = JSON.parse(model) - if (jsonClass.name != classConstructor.name) { - throw new Error( - `wrong json values for ${classConstructor.name} constructor` - ) - } - - const copyThis: any = Object.assign({}, classConstructor) - for (let key of Object.keys(classConstructor)) { - let value = copyThis[key] - if (value instanceof tf.Tensor) { - jsonClass[key] = tf.tensor(jsonClass[key]) - } - } - // for ensembles - if (jsonClass.estimators || jsonClass.steps) { - const jsonEstimatorOrStep = jsonClass.estimators || jsonClass.steps - for (let i = 0; i < jsonEstimatorOrStep.length; i++) { - const estimatorName = JSON.parse(jsonEstimatorOrStep[i][1]).name - const estimators = getEstimator(estimatorName, jsonEstimatorOrStep[i][1]) - jsonEstimatorOrStep[i][1] = Object.assign( - estimators, - jsonEstimatorOrStep[i][1] - ) - } - } - - if (jsonClass.le) { - const labelEncode = new LabelEncoder() - jsonClass.le = Object.assign(labelEncode, jsonClass.le) - } - return Object.assign(classConstructor, jsonClass) -} - -export async function toJson(classConstructor: any, classJson: any) { - let i = 0 - if (classConstructor.estimators) { - for (const estimator of classConstructor.estimators) { - classJson.estimators[i][1] = await estimator[1].toJson() - i += 1 - } - } - - if (classConstructor.steps) { - for (const step of classConstructor.steps) { - classJson.steps[i][1] = await step[1].toJson() - i += 1 - } - } - - return JSON.stringify(classJson) -} diff --git a/src/impute/SimpleImputer.test.ts b/src/impute/SimpleImputer.test.ts index fb58af50..d9957b5e 100644 --- a/src/impute/SimpleImputer.test.ts +++ b/src/impute/SimpleImputer.test.ts @@ -1,5 +1,5 @@ import { tf } from '../shared/globals' -import { SimpleImputer } from './SimpleImputer' +import { SimpleImputer, fromJSON } from '../index' describe('SimpleImputer', function () { it('Imputes with "constant" strategy 2D one column. In this strategy, we give the fill value', function () { @@ -119,7 +119,7 @@ describe('SimpleImputer', function () { expect(returned.arraySync()).toEqual(expected) expect(imputer.transform([[NaN, NaN]]).arraySync()).toEqual([[4, 3]]) }) - it('Should serialized Imputer', function () { + it('Should serialized Imputer', async function () { const imputer = new SimpleImputer({ strategy: 'mostFrequent' }) const data = [ @@ -129,21 +129,21 @@ describe('SimpleImputer', function () { [4, 2], [6, NaN] ] - const expected = { name: 'SimpleImputer', - missingValues: null, + missingValues: NaN, + fillValue: undefined, strategy: 'mostFrequent', statistics: { - type: 'Tensor', + name: 'Tensor', value: [4, 3] } } - const returned = imputer.fitTransform(data) - expect(JSON.parse(imputer.toJson() as string)).toEqual(expected) + imputer.fitTransform(data) + expect(await imputer.toObject()).toEqual(expected) }) - it('Should load serialized Imputer', function () { + it('Should load serialized Imputer', async function () { const imputer = new SimpleImputer({ strategy: 'mostFrequent' }) const data = [ @@ -162,8 +162,9 @@ describe('SimpleImputer', function () { [6, 3] ] - const returned = imputer.fitTransform(data) - const newImputer = new SimpleImputer().fromJson(imputer.toJson() as string) + imputer.fitTransform(data) + const thing = await imputer.toJSON() + const newImputer = await fromJSON(thing) const newReturned = newImputer.transform(data) expect(newReturned.arraySync()).toEqual(expected) expect(newImputer.transform([[NaN, NaN]]).arraySync()).toEqual([[4, 3]]) diff --git a/src/index.ts b/src/index.ts index 6ab8fb07..95c1e491 100644 --- a/src/index.ts +++ b/src/index.ts @@ -13,6 +13,7 @@ * ========================================================================== */ export { KNeighborsRegressor } from './neighbors/KNeighborsRegressor' +export { KNeighborsClassifier } from './neighbors/KNeighborsClassifier' export { LinearRegression, LinearRegressionParams @@ -83,3 +84,11 @@ export { DecisionTreeRegressor, DecisionTreeRegressorParams } from './tree/DecisionTree' +export { KFold } from './model_selection/KFold' +export { trainTestSplit } from './model_selection/trainTestSplit' +export { crossValScore } from './model_selection/crossValScore' +export { fromObject, fromJSON, Serialize } from './simpleSerializer' + +export { ClassificationCriterion, RegressionCriterion } from './tree/Criterion' +export { Splitter } from './tree/Splitter' +export { DecisionTreeBase, DecisionTree } from './tree/DecisionTree' diff --git a/src/linear_model/LinearRegression.test.ts b/src/linear_model/LinearRegression.test.ts index a4d67295..1a85e235 100644 --- a/src/linear_model/LinearRegression.test.ts +++ b/src/linear_model/LinearRegression.test.ts @@ -1,7 +1,6 @@ -import { LinearRegression } from './LinearRegression' +import { LinearRegression, fromJSON } from '../index' import { tensorEqual } from '../utils' import { tf } from '../shared/globals' - function roughlyEqual(a: number, b: number, tol = 0.1) { return Math.abs(a - b) < tol } @@ -148,8 +147,8 @@ describe('LinearRegression', function () { const lr = new LinearRegression({ fitIntercept: false }) await lr.fit(mediumX, yPlusJitter) - const serialized = await lr.toJson() - const newModel = new LinearRegression({}).fromJson(serialized) + const serialized = await lr.toJSON() + const newModel = await fromJSON(serialized) expect(tensorEqual(newModel.coef, tf.tensor1d([2.5, 1]), 0.1)).toBe(true) expect(roughlyEqual(newModel.intercept as number, 0)).toBe(true) diff --git a/src/linear_model/LogisticRegression.test.ts b/src/linear_model/LogisticRegression.test.ts index c00db0fa..be3fe8e1 100644 --- a/src/linear_model/LogisticRegression.test.ts +++ b/src/linear_model/LogisticRegression.test.ts @@ -1,6 +1,5 @@ -import { LogisticRegression } from './LogisticRegression' +import { LogisticRegression, fromJSON } from '../index' import { tf } from '../shared/globals' - describe('LogisticRegression', function () { it('Works on arrays (small example)', async function () { const lr = new LogisticRegression() @@ -133,8 +132,8 @@ describe('LogisticRegression', function () { let logreg = new LogisticRegression({ penalty: 'l2' }) await logreg.fit(X, y) - const serializeModel = await logreg.toJson() - const newModel = logreg.fromJson(serializeModel) + const serializeModel = await logreg.toJSON() + const newModel = await fromJSON(serializeModel) const newModelResult = newModel.predict(Xtest) expect(newModelResult.arraySync()).toEqual([0, 0, 0, 0, 0, 0, 2, 2, 2]) diff --git a/src/linear_model/SgdClassifier.ts b/src/linear_model/SgdClassifier.ts index b5900a39..06f3f539 100644 --- a/src/linear_model/SgdClassifier.ts +++ b/src/linear_model/SgdClassifier.ts @@ -20,7 +20,7 @@ import { Scikit2D, Scikit1D, OptimizerTypes, LossTypes } from '../types' import { OneHotEncoder } from '../preprocessing/OneHotEncoder' import { assert } from '../typesUtils' import { ClassifierMixin } from '../mixins' -import { fromJson, toJSON } from './modelSerializer' + /** * SGD is a thin Wrapper around Tensorflow's model api with a single dense layer. * With this base class and different error functions / regularizers we can @@ -404,13 +404,4 @@ export class SGDClassifier extends ClassifierMixin { private getModelWeight(): Promise> { return Promise.all(this.model.getWeights().map((weight) => weight.array())) } - - public async toJson(): Promise { - const classifierJson = JSON.parse(super.toJson() as string) - return toJSON(this, classifierJson) - } - - public fromJson(model: string) { - return fromJson(this, model) as this - } } diff --git a/src/linear_model/SgdRegressor.ts b/src/linear_model/SgdRegressor.ts index 917ac73c..e178889f 100644 --- a/src/linear_model/SgdRegressor.ts +++ b/src/linear_model/SgdRegressor.ts @@ -20,7 +20,7 @@ import { } from '../utils' import { Scikit2D, Scikit1D, OptimizerTypes, LossTypes } from '../types' import { RegressorMixin } from '../mixins' -import { fromJson, toJSON } from './modelSerializer' + /** * SGD is a thin Wrapper around Tensorflow's model api with a single dense layer. * With this base class and different error functions / regularizers we can @@ -380,13 +380,4 @@ export class SGDRegressor extends RegressorMixin { return intercept } - - public async toJson(): Promise { - const classifierJson = JSON.parse(super.toJson() as string) - return toJSON(this, classifierJson) - } - - public fromJson(model: string) { - return fromJson(this, model) as this - } } diff --git a/src/linear_model/modelSerializer.ts b/src/linear_model/modelSerializer.ts deleted file mode 100644 index aac0aeba..00000000 --- a/src/linear_model/modelSerializer.ts +++ /dev/null @@ -1,91 +0,0 @@ -import { optimizer, initializer, getLoss } from '../utils' -import { tf } from '../shared/globals' -import { OneHotEncoder } from '../preprocessing/OneHotEncoder' - -function getModelWeight( - model: tf.Sequential -): Promise> { - return Promise.all(model.getWeights().map((weight) => weight.array())) -} - -export async function toJSON( - classConstructor: any, - classifierJson: any -): Promise { - const modelConfig = classConstructor.model.getConfig() - const modelWeight = await getModelWeight(classConstructor.model) - classifierJson.model = { - config: modelConfig, - weight: modelWeight - } - - if (classConstructor.denseLayerArgs.kernelInitializer) { - const initializerName = - classConstructor.denseLayerArgs.kernelInitializer.constructor.name - classifierJson.denseLayerArgs.kernelInitializer = initializerName - } - if (classConstructor.denseLayerArgs.biasInitializer) { - const biasName = - classConstructor.denseLayerArgs.biasInitializer.constructor.name - classifierJson.denseLayerArgs.biasInitializer = biasName - } - // set optimizer - classifierJson.modelCompileArgs.optimizer = - classConstructor.model.optimizer.getConfig() - return JSON.stringify(classifierJson) -} - -export function fromJson(classConstructor: any, model: string) { - let jsonClass = JSON.parse(model) - if (jsonClass.name != classConstructor.name) { - throw new Error( - `wrong json values for ${classConstructor.name} constructor` - ) - } - - const jsonModel = tf.Sequential.fromConfig( - tf.Sequential, - jsonClass.model.config - ) as tf.Sequential - const jsonOpt = optimizer(jsonClass.optimizerType) - const optim = Object.assign(jsonOpt, jsonClass.modelCompileArgs.optimizer) - const loss = getLoss(jsonClass.lossType) - jsonClass.modelCompileArgs = { - ...jsonClass.modelCompileArgs, - optimizer: optim, - loss: loss - } - - jsonModel.compile(jsonClass.modelCompileArgs) - const weights = [] - for (const weight of jsonClass.model.weight) { - weights.push(tf.tensor(weight)) - } - jsonModel.setWeights(weights) - jsonClass.model = jsonModel - - // if call back create callback - // default usecase is set to EarlyStop - // might get complex for custom callback - if (jsonClass.modelFitArgs.callbacks) { - let jsonCallback = tf.callbacks.earlyStopping() - let modelFitArgs = jsonClass.modelFitArgs - jsonCallback = Object.assign(jsonCallback, modelFitArgs.callbacks[0]) - modelFitArgs.callbacks = [jsonCallback] - } - - if (jsonClass.denseLayerArgs.kernelInitializer) { - let initializerName = jsonClass.denseLayerArgs.kernelInitializer - jsonClass.denseLayerArgs.kernelInitializer = initializer(initializerName) - } - if (jsonClass.denseLayerArgs.biasInitializer) { - let biasName = jsonClass.denseLayerArgs.biasInitializer - jsonClass.denseLayerArgs.biasInitializer = initializer(biasName) - } - - if (jsonClass.oneHot) { - let jsonOneHotEncoder = new OneHotEncoder() - jsonClass.oneHot = Object.assign(jsonOneHotEncoder, jsonClass.oneHot) - } - return Object.assign(classConstructor, jsonClass) -} diff --git a/src/mixins.ts b/src/mixins.ts index d826e455..20085282 100644 --- a/src/mixins.ts +++ b/src/mixins.ts @@ -1,6 +1,6 @@ import { Scikit2D, Scikit1D } from './types' import { r2Score, accuracyScore } from './metrics/metrics' -import Serialize from './serialize' +import { Serialize } from './simpleSerializer' import { tf } from './shared/globals' export class TransformerMixin extends Serialize { // We assume that fit and transform exist diff --git a/src/model_selection/KFold.test.ts b/src/model_selection/KFold.test.ts index 625e0c0d..3b0678fb 100644 --- a/src/model_selection/KFold.test.ts +++ b/src/model_selection/KFold.test.ts @@ -14,7 +14,7 @@ */ import * as fc from 'fast-check' -import { KFold } from './KFold' +import { KFold } from '../index' import { alea } from '../randUtils' import '../jestTensorMatchers' import { tf } from '../shared/globals' diff --git a/src/naive_bayes/BaseNaiveBayes.ts b/src/naive_bayes/BaseNaiveBayes.ts index 563c8fa3..fd48b42b 100644 --- a/src/naive_bayes/BaseNaiveBayes.ts +++ b/src/naive_bayes/BaseNaiveBayes.ts @@ -16,7 +16,7 @@ import { polyfillUnique } from '../tfUtils' import { tf } from '../shared/globals' import { Scikit1D, Scikit2D } from '../types' import { convertToNumericTensor2D, convertToTensor1D } from '../utils' -import Serialize from '../serialize' +import { Serialize } from '../simpleSerializer' export interface NaiveBayesParams { /** @@ -152,37 +152,4 @@ export abstract class BaseNaiveBayes extends Serialize { mean: tf.Tensor1D, variance: tf.Tensor1D ): tf.Tensor1D - - public toJson(): string { - const jsonClass = JSON.parse(super.toJson() as string) - - if (this.priors) { - jsonClass.priors = this.priors.arraySync() - } - jsonClass.classes = this.classes.arraySync() - jsonClass.means = this.means.map((t: tf.Tensor1D) => t.arraySync()) - jsonClass.variances = this.variances.map((v: tf.Tensor1D) => v.arraySync()) - return JSON.stringify(jsonClass) - } - - public fromJson(model: string) { - const jsonModel = JSON.parse(model) - - if (jsonModel.priors) { - jsonModel.priors = tf.tensor(jsonModel.priors) - } - jsonModel.classes = tf.tensor(jsonModel.classes) - - const means = [] - for (const wMeans of jsonModel.means) { - means.push(tf.tensor(wMeans)) - } - const variances = [] - for (const variance of jsonModel.variances) { - variances.push(tf.tensor(variance)) - } - jsonModel.means = means - jsonModel.variances = variances - return Object.assign(this, jsonModel) as this - } } diff --git a/src/naive_bayes/GaussianNB.test.ts b/src/naive_bayes/GaussianNB.test.ts index 627018e8..5cdfc9bb 100644 --- a/src/naive_bayes/GaussianNB.test.ts +++ b/src/naive_bayes/GaussianNB.test.ts @@ -12,7 +12,7 @@ * limitations under the License. * ========================================================================== */ -import { GaussianNB } from './GaussianNB' +import { GaussianNB, fromJSON } from '../index' describe('GaussianNB', function () { it('without priors', async () => { @@ -99,10 +99,10 @@ describe('GaussianNB', function () { const model = new GaussianNB({ priors: [0.5, 0.5], varSmoothing: 1.0 }) await model.fit(X, y) - const labels = model.predict(X) + model.predict(X) - const serializeModel = model.toJson() - const newModel = new GaussianNB().fromJson(serializeModel) + const serializeModel = await model.toJSON() + const newModel = await fromJSON(serializeModel) expect(newModel.predict(X).arraySync()).toEqual([0, 0, 1, 1, 1]) }) }) diff --git a/src/neighbors/KNeighborsBase.ts b/src/neighbors/KNeighborsBase.ts index 3b4dd715..31774e18 100644 --- a/src/neighbors/KNeighborsBase.ts +++ b/src/neighbors/KNeighborsBase.ts @@ -21,7 +21,7 @@ import { convertToNumericTensor1D, convertToNumericTensor2D } from '../utils' import { assert } from '../typesUtils' import { tf } from '../shared/globals' import { KdTree } from './KdTree' -import Serialize from '../serialize' +import { Serialize } from '../simpleSerializer' const WEIGHTS_FUNCTIONS = { uniform(distances: tf.Tensor2D) { diff --git a/src/neighbors/KNeighborsClassifier.test.ts b/src/neighbors/KNeighborsClassifier.test.ts index 559a72df..b69464fc 100644 --- a/src/neighbors/KNeighborsClassifier.test.ts +++ b/src/neighbors/KNeighborsClassifier.test.ts @@ -13,7 +13,7 @@ * ========================================================================== */ -import { KNeighborsClassifier } from './KNeighborsClassifier' +import { KNeighborsClassifier } from '../index' import { KNeighborsParams } from './KNeighborsBase' import { dataUrls } from '../datasets/datasets' import { crossValScore } from '../model_selection/crossValScore' diff --git a/src/neighbors/KNeighborsRegressor.test.ts b/src/neighbors/KNeighborsRegressor.test.ts index 50c5c418..751fb8fb 100644 --- a/src/neighbors/KNeighborsRegressor.test.ts +++ b/src/neighbors/KNeighborsRegressor.test.ts @@ -13,12 +13,10 @@ * ========================================================================== */ -import { KNeighborsRegressor } from './KNeighborsRegressor' +import { KNeighborsRegressor, crossValScore, KFold } from '../index' import { KNeighborsParams } from './KNeighborsBase' import { dataUrls } from '../datasets/datasets' import { arrayEqual } from '../utils' -import { crossValScore } from '../model_selection/crossValScore' -import { KFold } from '../model_selection/KFold' import { negMeanSquaredError } from '../model_selection/scorers' import '../jestTensorMatchers' import * as dfd from 'danfojs-node' diff --git a/src/pipeline/Pipeline.test.ts b/src/pipeline/Pipeline.test.ts index 52ead335..8a9ac2ec 100644 --- a/src/pipeline/Pipeline.test.ts +++ b/src/pipeline/Pipeline.test.ts @@ -1,9 +1,13 @@ -import { Pipeline, makePipeline } from './Pipeline' +import { + Pipeline, + makePipeline, + LinearRegression, + SimpleImputer, + MinMaxScaler, + fromJSON +} from '../index' import { tf } from '../shared/globals' import { tensorEqual } from '../utils' -import { LinearRegression } from '../linear_model/LinearRegression' -import { SimpleImputer } from '../impute/SimpleImputer' -import { MinMaxScaler } from '../preprocessing/MinMaxScaler' describe('Pipeline', function () { it('Use a Pipeline (min-max scaler, and linear regression)', async function () { @@ -96,8 +100,8 @@ describe('Pipeline', function () { await pipeline.fit(X, y) - const saveModel = (await pipeline.toJson()) as string - const newPipeLine = new Pipeline().fromJson(saveModel) + const saveModel = await pipeline.toJSON() + const newPipeLine = await fromJSON(saveModel) expect(newPipeLine.steps[1][1].min.arraySync()).toEqual([0, 0]) expect( diff --git a/src/pipeline/Pipeline.ts b/src/pipeline/Pipeline.ts index 6cf5e6c4..a2c5e923 100644 --- a/src/pipeline/Pipeline.ts +++ b/src/pipeline/Pipeline.ts @@ -1,8 +1,7 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ import { assert } from '../typesUtils' import { Scikit1D, Scikit2D } from '../types' -import Serialize from '../serialize' -import { toJson, fromJson } from '../ensemble/serializeEnsemble' +import { Serialize } from '../simpleSerializer' import { tf } from '../shared/globals' /* @@ -206,15 +205,6 @@ export class Pipeline extends Serialize { let XT = this.fitTransformExceptLast(X) return await lastEstimator.fitPredict(XT, y) } - - public async toJson(): Promise { - const classJson = JSON.parse(super.toJson() as string) - return toJson(this, classJson) - } - - public fromJson(model: string) { - return fromJson(this, model) as this - } } /** diff --git a/src/preprocessing/LabelEncoder.test.ts b/src/preprocessing/LabelEncoder.test.ts index 062929c8..b56ede1a 100644 --- a/src/preprocessing/LabelEncoder.test.ts +++ b/src/preprocessing/LabelEncoder.test.ts @@ -1,4 +1,4 @@ -import { LabelEncoder } from './LabelEncoder' +import { LabelEncoder } from '../index' import * as dfd from 'danfojs-node' describe('LabelEncoder', function () { diff --git a/src/preprocessing/LabelEncoder.ts b/src/preprocessing/LabelEncoder.ts index 9067e707..6173f31b 100644 --- a/src/preprocessing/LabelEncoder.ts +++ b/src/preprocessing/LabelEncoder.ts @@ -16,7 +16,7 @@ import { Scikit1D } from '../types' import { tf } from '../shared/globals' import { isSeriesInterface } from '../typesUtils' -import Serialize from '../serialize' +import { Serialize } from '../simpleSerializer' /* Next steps: diff --git a/src/preprocessing/MaxAbsScaler.test.ts b/src/preprocessing/MaxAbsScaler.test.ts index b69f584a..f742bdcc 100644 --- a/src/preprocessing/MaxAbsScaler.test.ts +++ b/src/preprocessing/MaxAbsScaler.test.ts @@ -1,4 +1,4 @@ -import { MaxAbsScaler } from './MaxAbsScaler' +import { MaxAbsScaler, fromJSON } from '../index' import * as dfd from 'danfojs-node' import { tf } from '../shared/globals' import { arrayEqual } from '../utils' @@ -135,6 +135,20 @@ describe('MaxAbsScaler', function () { expect(arrayEqual(X_trans_new, X_expected_new, 0.01)).toBe(true) }) + it('Serialize and unserialize MaxAbsScaler', async function () { + const data = tf.tensor2d([4, 4, 'whoops', 3, 3] as any, [5, 1]) + const scaler = new MaxAbsScaler() + scaler.fit(data) + const serial = await scaler.toJSON() + const newModel = await fromJSON(serial) + expect(newModel.transform(data).arraySync().flat()).toEqual([ + 1, + 1, + NaN, + 0.75, + 0.75 + ]) + }) /* Streaming test def test_maxabs_scaler_partial_fit(): # Test if partial_fit run over many batches of size 1 and 50 diff --git a/src/preprocessing/MinMaxScaler.test.ts b/src/preprocessing/MinMaxScaler.test.ts index 7b15cc47..a4efd446 100644 --- a/src/preprocessing/MinMaxScaler.test.ts +++ b/src/preprocessing/MinMaxScaler.test.ts @@ -1,9 +1,8 @@ -import { MinMaxScaler } from './MinMaxScaler' +import { MinMaxScaler, fromJSON } from '../index' import * as dfd from 'danfojs-node' import { isDataFrameInterface, isSeriesInterface } from '../typesUtils' import { ScikitVecOrMatrix } from '../types' import { tf } from '../shared/globals' - export function convertTensorToInputType( tensor: tf.Tensor, inputData: ScikitVecOrMatrix @@ -161,12 +160,12 @@ describe('MinMaxscaler', function () { 0 ]) }) - it('Serialize and unserialize MinMaxScaler', function () { + it('Serialize and unserialize MinMaxScaler', async function () { const data = tf.tensor2d([4, 4, 'whoops', 3, 3] as any, [5, 1]) const scaler = new MinMaxScaler() scaler.fit(data) - const serial = scaler.toJson() as string - const newModel = new MinMaxScaler().fromJson(serial) + const serial = await scaler.toJSON() + const newModel = await fromJSON(serial) expect(newModel.transform(data).arraySync().flat()).toEqual([ 1, 1, diff --git a/src/preprocessing/Normalizer.test.ts b/src/preprocessing/Normalizer.test.ts index 65d20877..b202c377 100644 --- a/src/preprocessing/Normalizer.test.ts +++ b/src/preprocessing/Normalizer.test.ts @@ -1,4 +1,4 @@ -import { Normalizer } from './Normalizer' +import { Normalizer } from '../index' import * as dfd from 'danfojs-node' import { arrayEqual } from '../utils' diff --git a/src/preprocessing/OneHotEncoder.test.ts b/src/preprocessing/OneHotEncoder.test.ts index 35100413..2ddd7fce 100644 --- a/src/preprocessing/OneHotEncoder.test.ts +++ b/src/preprocessing/OneHotEncoder.test.ts @@ -1,5 +1,5 @@ import { tf } from '../shared/globals' -import { OneHotEncoder } from './OneHotEncoder' +import { OneHotEncoder } from '../index' import { arrayTo2DColumn } from '../utils' describe('OneHotEncoder', function () { diff --git a/src/preprocessing/OrdinalEncoder.test.ts b/src/preprocessing/OrdinalEncoder.test.ts index 57666262..53438b53 100644 --- a/src/preprocessing/OrdinalEncoder.test.ts +++ b/src/preprocessing/OrdinalEncoder.test.ts @@ -1,4 +1,4 @@ -import { OrdinalEncoder } from './OrdinalEncoder' +import { OrdinalEncoder } from '../index' import { arrayTo2DColumn } from '../utils' describe('OrdinalEncoder', function () { diff --git a/src/preprocessing/RobustScaler.test.ts b/src/preprocessing/RobustScaler.test.ts index 3a31475e..3653c9d4 100644 --- a/src/preprocessing/RobustScaler.test.ts +++ b/src/preprocessing/RobustScaler.test.ts @@ -1,4 +1,4 @@ -import { RobustScaler } from './RobustScaler' +import { RobustScaler } from '../index' import * as dfd from 'danfojs-node' import { arrayEqual } from '../utils' diff --git a/src/preprocessing/StandardScaler.test.ts b/src/preprocessing/StandardScaler.test.ts index e3830ef7..d80621d6 100644 --- a/src/preprocessing/StandardScaler.test.ts +++ b/src/preprocessing/StandardScaler.test.ts @@ -1,4 +1,4 @@ -import { StandardScaler } from './StandardScaler' +import { StandardScaler } from '../index' import * as dfd from 'danfojs-node' describe('StandardScaler', function () { diff --git a/src/serialize.ts b/src/serialize.ts deleted file mode 100644 index 2ef36c94..00000000 --- a/src/serialize.ts +++ /dev/null @@ -1,50 +0,0 @@ -/** - * A Generic class to serialized and Unserialized classes (models, transformers, - * or any operator) - */ - -import { tf } from './shared/globals' -export default class Serialize { - public name = 'Serialize' // default name for all inherited class - - /** - * Serialize all [inherited] class property into - * a json string - * @returns Json string - */ - public toJson(): string | Promise { - const thisCopy: any = Object.assign({}, this) - for (const key of Object.keys(thisCopy)) { - let value = thisCopy[key] - if (value instanceof tf.Tensor) { - thisCopy[key] = { - type: 'Tensor', - value: value.arraySync() - } - } - } - return JSON.stringify(thisCopy) - } - - /** - * Initialize [inherited] class from serialized - * json string - * @param model string - * @returns [Inherited] Class - */ - public fromJson(model: string) { - let jsonClass = JSON.parse(model) - if (jsonClass.name != this.name) { - throw new Error(`wrong json values for ${this.name} constructor`) - } - - for (let key of Object.keys(jsonClass)) { - let value = jsonClass[key] - if (typeof value === 'object' && value?.type === 'Tensor') { - jsonClass[key] = tf.tensor(jsonClass[key].value) - } - } - - return Object.assign(this, jsonClass) as this - } -} diff --git a/src/simpleSerializer.ts b/src/simpleSerializer.ts new file mode 100644 index 00000000..6b54b764 --- /dev/null +++ b/src/simpleSerializer.ts @@ -0,0 +1,234 @@ +import { tf } from './shared/globals' +import { encode, decode } from 'base64-arraybuffer' +const EstimatorList = [ + 'KNeighborsRegressor', + 'LinearRegression', + 'LassoRegression', + 'RidgeRegression', + 'ElasticNet', + 'LogisticRegression', + 'DummyRegressor', + 'DummyClassifier', + 'MinMaxScaler', + 'StandardScaler', + 'MaxAbsScaler', + 'SimpleImputer', + 'OneHotEncoder', + 'LabelEncoder', + 'OrdinalEncoder', + 'Normalizer', + 'Pipeline', + 'ColumnTransformer', + 'RobustScaler', + 'KMeans', + 'VotingRegressor', + 'VotingClassifier', + 'LinearSVC', + 'LinearSVR', + 'GaussianNB', + 'DecisionTreeClassifier', + 'DecisionTreeRegressor', + 'ClassificationCriterion', + 'RegressionCriterion', + 'Splitter', + 'DecisionTreeBase', + 'DecisionTree' +] + +/** + * 1. Make a list called EstimatorList + * 2. Do a dynamic import here + */ + +class JSONHandler { + savedArtifacts: any + constructor(artifacts?: any) { + this.savedArtifacts = artifacts || null + } + + async save(artifacts: any) { + // Base 64 encoding + artifacts.weightData = encode(artifacts.weightData) + this.savedArtifacts = artifacts + return { + modelArtifactsInfo: { + dateSaved: new Date(), + modelTopologyType: 'JSON', + modelTopologyBytes: JSON.stringify(artifacts.modelTopology).length, + weightSpecsBytes: JSON.stringify(artifacts.weightSpecs).length, + weightDataBytes: artifacts.weightData.byteLength + } + } + } + + async load() { + // Base64 decode + this.savedArtifacts.weightData = decode(this.savedArtifacts.weightData) + return this.savedArtifacts + } +} + +export async function toObjectInner( + val: any, + ignoreKeys: string[] = [] +): Promise { + if (['number', 'string', 'undefined', 'boolean'].includes(typeof val)) { + return val + } + + if (typeof val === 'function') { + console.warn( + `warning: Serializing function ${val}. Not going to be able to deserialize this later.` + ) + if (val.name) { + return val.name + } + } + + if (typeof val === 'object') { + // Null case + if (val === null) { + return null + } + // Array case + if (Array.isArray(val)) { + return await Promise.all( + val.map(async (el) => await toObjectInner(el, ignoreKeys)) + ) + } + + // Serialize a Tensor + if (val instanceof tf.Tensor) { + return { + name: 'Tensor', + value: val.arraySync() + } + } + + // Int32Array serialization. Used for DecisionTrees + if (val instanceof Int32Array) { + return { + name: 'Int32Array', + value: Array.from(val) + } + } + + // The tf object + if (val.ENV && val.AdadeltaOptimizer && val.version) { + return { + name: 'TF', + version: val.version.tfjs + } + } + + // tf.layers model + if (val instanceof tf.Sequential) { + let mem = new JSONHandler() + await val.save(mem as any) + return { + name: 'Sequential', + artifacts: mem.savedArtifacts + } + } + + // Generic object case / class case + let response: any = {} + for (let key of Object.keys(val)) { + // Ignore all the keys that we choose to + if (ignoreKeys.includes(key)) { + continue + } + // Ignore any function when we serialize + // if (typeof val[key] === 'function') { + // continue + // } + response[key] = await toObjectInner(val[key], ignoreKeys) + } + return response + } +} + +export async function fromObjectInner(val: any): Promise { + // Ignores all types that aren't objects + if (typeof val !== 'object') { + return val + } + + // Null case + if (val === null) { + return null + } + + // Make a Tensor + if (val.name === 'Tensor') { + return tf.tensor(val.value) + } + + if (val.name === 'Sequential') { + let newMem = new JSONHandler(val.artifacts) + return await tf.loadLayersModel(newMem as any) + } + + if (val.name === 'Int32Array') { + return new Int32Array(val.value) + } + + // Array case + if (Array.isArray(val)) { + return await Promise.all(val.map(async (el) => await fromObjectInner(el))) + } + + // Generic object case + for (let key of Object.keys(val)) { + val[key] = await fromObjectInner(val[key]) + } + + // Make a model + if (EstimatorList.includes(val.name)) { + // Do dynamic import to avoid circular dependency tree + // Every class extends this class and therefor it + // can't import those classes in here + let module = await import('./index') + let model = (module as any)[val.name] + + let resultObj = new model(val) + for (let key of Object.keys(val)) { + resultObj[key] = val[key] + } + return resultObj + } + + return val +} + +export async function fromObject(val: any): Promise { + try { + return await fromObjectInner(val) + } catch (e) { + console.error(e) + } +} + +export async function fromJSON(val: string): Promise { + return await fromObject(JSON.parse(val)) +} + +let ignoredKeysForSGDRegressor = [ + 'modelCompileArgs', + 'modelFitArgs', + 'denseLayerArgs' +] + +export class Serialize { + async toObject(): Promise { + try { + return await toObjectInner(this, ignoredKeysForSGDRegressor) + } catch (e) { + console.error(e) + } + } + + async toJSON(): Promise { + return JSON.stringify(await this.toObject()) + } +} diff --git a/src/tree/Criterion.test.ts b/src/tree/Criterion.test.ts index c58617fc..76e88960 100644 --- a/src/tree/Criterion.test.ts +++ b/src/tree/Criterion.test.ts @@ -1,5 +1,5 @@ import { ClassificationCriterion, giniCoefficient, entropy } from './Criterion' - +import { fromJSON } from '../simpleSerializer' describe('Criterion', function () { let X = [ [-2, -1], @@ -15,7 +15,7 @@ describe('Criterion', function () { sampleMap[i] = i } it('Use the criterion (init)', async function () { - let criterion = new ClassificationCriterion('gini', y) + let criterion = new ClassificationCriterion({ impurityMeasure: 'gini', y }) criterion.init(0, 6, sampleMap) expect(criterion.start).toEqual(0) @@ -29,7 +29,7 @@ describe('Criterion', function () { expect(criterion.labelFreqsRight[1]).toEqual(0) }, 1000) it('Use the criterion (update)', async function () { - let criterion = new ClassificationCriterion('gini', y) + let criterion = new ClassificationCriterion({ impurityMeasure: 'gini', y }) criterion.init(0, 6, sampleMap) criterion.update(3, sampleMap) @@ -40,20 +40,23 @@ describe('Criterion', function () { expect(criterion.labelFreqsRight[1]).toEqual(3) }, 1000) it('Use the criterion (gini)', async function () { - let criterion = new ClassificationCriterion('gini', y) + let criterion = new ClassificationCriterion({ impurityMeasure: 'gini', y }) criterion.init(0, 6, sampleMap) expect(criterion.nodeImpurity()).toEqual(0.5) }, 1000) it('Use the criterion (entropy)', async function () { - let criterion = new ClassificationCriterion('entropy', y) + let criterion = new ClassificationCriterion({ + impurityMeasure: 'entropy', + y + }) criterion.init(0, 6, sampleMap) expect(criterion.nodeImpurity()).toEqual(1) }, 1000) it('Use the criterion (gini update)', async function () { - let criterion = new ClassificationCriterion('gini', y) + let criterion = new ClassificationCriterion({ impurityMeasure: 'gini', y }) criterion.init(0, 6, sampleMap) criterion.update(4, sampleMap) @@ -75,10 +78,13 @@ describe('Criterion', function () { expect(entropy(labelFreqs, nSamples)).toEqual(0.7219280948873623) }, 1000) it('Use the criterion (entropy)', async function () { - let criterion = new ClassificationCriterion('entropy', y) + let criterion = new ClassificationCriterion({ + impurityMeasure: 'entropy', + y + }) criterion.init(0, 6, sampleMap) - const serial = criterion.toJson() as string - const newCriterion = ClassificationCriterion.fromJson(serial) + const serial = await criterion.toJSON() + const newCriterion = await fromJSON(serial) expect(newCriterion.nodeImpurity()).toEqual(1) }, 1000) }) diff --git a/src/tree/Criterion.ts b/src/tree/Criterion.ts index d7fd6cae..ef846390 100644 --- a/src/tree/Criterion.ts +++ b/src/tree/Criterion.ts @@ -1,6 +1,5 @@ -import { assert } from '../typesUtils' import { int } from '../randUtils' -import Serialize from '../serialize' +import { Serialize } from '../simpleSerializer' export type ImpurityMeasure = 'gini' | 'entropy' | 'squared_error' @@ -43,7 +42,7 @@ function arrayMax(labels: int[]) { export class ClassificationCriterion extends Serialize { y: int[] impurityMeasure: ImpurityMeasure - impurityFunc: (labelFreqs: int[], nSamples: int) => number + // impurityFunc: (labelFreqs: int[], nSamples: int) => number start: int = 0 end: int = 0 pos: int = 0 @@ -54,21 +53,19 @@ export class ClassificationCriterion extends Serialize { nSamples: int = 0 nSamplesLeft: int = 0 nSamplesRight: int = 0 - name = 'classificationCriterion' - - constructor(impurityMeasure: ImpurityMeasure, y: number[]) { + name = 'ClassificationCriterion' + + constructor({ + impurityMeasure, + y + }: { + impurityMeasure: ImpurityMeasure + y: number[] + }) { super() - assert( - ['gini', 'entropy'].includes(impurityMeasure), - 'Unkown impurity measure. Only supports gini, and entropy' - ) this.impurityMeasure = impurityMeasure - if (this.impurityMeasure === 'gini') { - this.impurityFunc = giniCoefficient - } else { - this.impurityFunc = entropy - } + // This assumes that the labels are 0,1,2,...,(n-1) this.nLabels = arrayMax(y) + 1 this.y = y @@ -116,12 +113,12 @@ export class ClassificationCriterion extends Serialize { } childrenImpurities() { + let impurityFunc = + this.impurityMeasure === 'gini' ? giniCoefficient : entropy + return { - impurityLeft: this.impurityFunc(this.labelFreqsLeft, this.nSamplesLeft), - impurityRight: this.impurityFunc( - this.labelFreqsRight, - this.nSamplesRight - ) + impurityLeft: impurityFunc(this.labelFreqsLeft, this.nSamplesLeft), + impurityRight: impurityFunc(this.labelFreqsRight, this.nSamplesRight) } } @@ -134,7 +131,10 @@ export class ClassificationCriterion extends Serialize { } nodeImpurity() { - return this.impurityFunc(this.labelFreqsTotal, this.nSamples) + let impurityFunc = + this.impurityMeasure === 'gini' ? giniCoefficient : entropy + + return impurityFunc(this.labelFreqsTotal, this.nSamples) } nodeValue() { @@ -143,10 +143,10 @@ export class ClassificationCriterion extends Serialize { static fromJson(model: string) { const jsonClass = JSON.parse(model) - const newModel = new ClassificationCriterion( - jsonClass.impurityMeasure, - jsonClass.y - ) + const newModel = new ClassificationCriterion({ + impurityMeasure: jsonClass.impurityMeasure, + y: jsonClass.y + }) return Object.assign(newModel, jsonClass) } } @@ -154,7 +154,7 @@ export class ClassificationCriterion extends Serialize { export class RegressionCriterion extends Serialize { y: number[] impurityMeasure: 'squared_error' - impurityFunc: (ySquaredSum: number, ySum: number, nSamples: int) => number + // impurityFunc: (ySquaredSum: number, ySum: number, nSamples: int) => number start: int = 0 end: int = 0 pos: int = 0 @@ -167,18 +167,23 @@ export class RegressionCriterion extends Serialize { nSamples: int = 0 nSamplesLeft: int = 0 nSamplesRight: int = 0 - name = 'regressionCriterion' - - constructor(impurityMeasure: 'squared_error', y: number[]) { + name = 'RegressionCriterion' + + constructor({ + impurityMeasure, + y + }: { + impurityMeasure: 'squared_error' + y: number[] + }) { super() - assert( - ['squared_error'].includes(impurityMeasure), - 'Unkown impurity measure. Only supports squared_error' - ) + + // We don't assert in the constructor, we assert in fit in accordance with the sklearn docs // Support MAE one day this.impurityMeasure = impurityMeasure - this.impurityFunc = mse + // We don't set the impurityFunc here because we need it to be serializable as an object + // this.impurityFunc = mse this.y = y } @@ -224,13 +229,15 @@ export class RegressionCriterion extends Serialize { } childrenImpurities() { + // once we get another impurity function we can do a ternary here + let impurityFunc = mse return { - impurityLeft: this.impurityFunc( + impurityLeft: impurityFunc( this.squaredSumLeft, this.sumTotalLeft, this.nSamplesLeft ), - impurityRight: this.impurityFunc( + impurityRight: impurityFunc( this.squaredSumRight, this.sumTotalRight, this.nSamplesRight @@ -247,7 +254,9 @@ export class RegressionCriterion extends Serialize { } nodeImpurity() { - return this.impurityFunc(this.squaredSum, this.sumTotal, this.nSamples) + // once we get another impurity function we can do a ternary here + let impurityFunc = mse + return impurityFunc(this.squaredSum, this.sumTotal, this.nSamples) } nodeValue() { @@ -256,10 +265,10 @@ export class RegressionCriterion extends Serialize { static fromJson(model: string) { const jsonClass = JSON.parse(model) - const newModel = new RegressionCriterion( - jsonClass.impurityMeasure, - jsonClass.y - ) + const newModel = new RegressionCriterion({ + impurityMeasure: jsonClass.impurityMeasure, + y: jsonClass.y + }) return Object.assign(newModel, jsonClass) } } diff --git a/src/tree/DecisionTree.test.ts b/src/tree/DecisionTree.test.ts index 7e1a5207..71652fac 100644 --- a/src/tree/DecisionTree.test.ts +++ b/src/tree/DecisionTree.test.ts @@ -1,6 +1,7 @@ import { DecisionTreeClassifier, DecisionTreeRegressor } from './DecisionTree' import { dataUrls } from '../datasets/datasets' import * as dfd from 'danfojs-node' +import { fromJSON } from '../simpleSerializer' describe('DecisionTree', function () { it('Use the DecisionTree (toy)', async function () { @@ -620,8 +621,8 @@ describe('DecisionTree', function () { let tree_classifier = new DecisionTreeClassifier() tree_classifier.fit(X, y) - const serial = tree_classifier.toJson() - const newTree = new DecisionTreeClassifier().fromJson(serial) + const serial = await tree_classifier.toJSON() + const newTree = await fromJSON(serial) expect(newTree.predict(T)).toEqual(true_result) }, 1000) }) diff --git a/src/tree/DecisionTree.ts b/src/tree/DecisionTree.ts index 292dca08..8e654f6a 100644 --- a/src/tree/DecisionTree.ts +++ b/src/tree/DecisionTree.ts @@ -8,7 +8,7 @@ import { validateX, validateY } from './utils' import { Scikit1D, Scikit2D } from '../types' import { convertScikit2DToArray, convertScikit1DToArray } from '../utils' import { LabelEncoder } from '../preprocessing/LabelEncoder' -import Serialize from '../serialize' +import { Serialize } from '../simpleSerializer' /* Next steps: @@ -48,9 +48,10 @@ function argMax(array: number[]) { return array.map((x, i) => [x, i]).reduce((r, a) => (a[0] > r[0] ? a : r))[1] } -class DecisionTree { +export class DecisionTree { nodes: Node[] = [] isBuilt = false + name = 'DecisionTree' getLeafNodes(X: number[][]): int[] { let leafNodeIds: int[] = [] @@ -140,7 +141,7 @@ interface DecisionTreeBaseParams { minImpurityDecrease?: number } -class DecisionTreeBase extends Serialize { +export class DecisionTreeBase extends Serialize { splitter!: Splitter stack: NodeRecord[] = [] minSamplesLeaf: int @@ -154,6 +155,7 @@ class DecisionTreeBase extends Serialize { X: number[][] = [] y: number[] = [] labelEncoder?: LabelEncoder + name: string constructor({ criterion = 'gini', @@ -173,6 +175,7 @@ class DecisionTreeBase extends Serialize { this.minImpurityDecrease = minImpurityDecrease this.maxFeaturesNumb = 0 this.tree = new DecisionTree() + this.name = 'DecisionTreeBase' } calcMaxFeatures( nFeatures: int, @@ -203,14 +206,14 @@ class DecisionTreeBase extends Serialize { // CheckNegativeLabels(yptr); this.maxFeaturesNumb = this.calcMaxFeatures(X[0].length, this.maxFeatures) - this.splitter = new Splitter( + this.splitter = new Splitter({ X, y, - this.minSamplesLeaf, - this.criterion, - this.maxFeaturesNumb, - newSamplesSubset - ) + minSamplesLeaf: this.minSamplesLeaf, + impurityMeasure: this.criterion, + maxFeatures: this.maxFeaturesNumb, + samplesSubset: newSamplesSubset + }) // put root node on stack let rootNode: NodeRecord = { @@ -298,37 +301,6 @@ class DecisionTreeBase extends Serialize { this.tree.populateChildIds() this.tree.isBuilt = true } - - public toJson(): string { - const jsonClass = JSON.parse(super.toJson() as string) - - if (this.splitter) { - jsonClass.splitter = this.splitter.toJson() as string - } - if (this.labelEncoder) { - jsonClass.labelEncoder = this.labelEncoder.toJson() - } - return JSON.stringify(jsonClass) - } - - public fromJson(model: string) { - const jsonClass = JSON.parse(model) - - if (jsonClass.tree) { - const tree = new DecisionTree() - jsonClass.tree = Object.assign(tree, jsonClass.tree) - } - - if (jsonClass.splitter) { - jsonClass.splitter = Splitter.fromJson(jsonClass.splitter) - } - if (jsonClass.labelEncoder) { - jsonClass.labelEncoder = new LabelEncoder().fromJson( - jsonClass.labelEncoder - ) - } - return Object.assign(this, jsonClass) as this - } } export interface DecisionTreeClassifierParams { diff --git a/src/tree/Splitter.test.ts b/src/tree/Splitter.test.ts index ff7d7a6c..620aa37e 100644 --- a/src/tree/Splitter.test.ts +++ b/src/tree/Splitter.test.ts @@ -1,5 +1,6 @@ import { ImpurityMeasure } from './Criterion' import { Splitter } from './Splitter' +import { fromJSON } from '../simpleSerializer' describe('Splitter', function () { let types = ['gini', 'entropy', 'squared_error'] @@ -8,7 +9,14 @@ describe('Splitter', function () { let y = [0, 0, 0, 1, 1, 1] types.forEach((type) => { - let splitter = new Splitter(X, y, 1, type as ImpurityMeasure, 1, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 1, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 1, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.threshold).toEqual(0) @@ -21,7 +29,14 @@ describe('Splitter', function () { let y = [1, 1, 0, 1, 1, 1] types.forEach((type) => { - let splitter = new Splitter(X, y, 1, type as ImpurityMeasure, 1, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 1, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 1, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.threshold).toEqual(0) @@ -34,7 +49,14 @@ describe('Splitter', function () { let y = [1, 0, 1, 1, 1, 1] types.forEach((type) => { - let splitter = new Splitter(X, y, 1, type as ImpurityMeasure, 1, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 1, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 1, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.threshold).toEqual(-0.5) expect(bestSplit.feature).toEqual(0) @@ -47,7 +69,14 @@ describe('Splitter', function () { let y = [1, 1, 1, 1, 2, 2, 2, 2] types.forEach((type) => { - let splitter = new Splitter(X, y, 1, type as ImpurityMeasure, 1, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 1, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 1, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.foundSplit).toEqual(false) expect(bestSplit.threshold).toEqual(0) @@ -60,7 +89,14 @@ describe('Splitter', function () { let y = [1, 1, 1, 2, 2, 2, 2, 2] types.forEach((type) => { - let splitter = new Splitter(X, y, 4, type as ImpurityMeasure, 1, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 4, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 1, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.foundSplit).toEqual(true) expect(bestSplit.feature).toEqual(0) @@ -73,7 +109,14 @@ describe('Splitter', function () { let y = [1, 1, 1, 2, 2, 2, 2, 2] types.forEach((type) => { - let splitter = new Splitter(X, y, 4, type as ImpurityMeasure, 1, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 4, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 1, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.foundSplit).toEqual(true) expect(bestSplit.feature).toEqual(0) @@ -95,7 +138,14 @@ describe('Splitter', function () { let y = [1, 1, 1, 1, 2, 2, 2, 2] types.forEach((type) => { - let splitter = new Splitter(X, y, 1, type as ImpurityMeasure, 20, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 1, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 20, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.foundSplit).toEqual(true) expect(bestSplit.feature).toEqual(1) @@ -117,7 +167,14 @@ describe('Splitter', function () { let y = [2, 1, 1, 2, 1, 2, 2, 1] types.forEach((type) => { - let splitter = new Splitter(X, y, 1, type as ImpurityMeasure, 20, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 1, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 20, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.foundSplit).toEqual(true) expect(bestSplit.feature).toEqual(1) @@ -137,11 +194,18 @@ describe('Splitter', function () { [0, 1] ] let y = [2, 1, 1, 2, 1, 2, 2, 1] - let splitter = new Splitter(X, y, 1, 'gini', 20, []) - let bestSplit = splitter.splitNode() - const serial = splitter.toJson() - const newSpliter = Splitter.fromJson(serial) - const newBestSplitter = newSpliter.splitNode() + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 1, + impurityMeasure: 'gini', + maxFeatures: 20, + samplesSubset: [] + }) + splitter.splitNode() + const serial = await splitter.toJSON() + const newSplitter = await fromJSON(serial) + const newBestSplitter = newSplitter.splitNode() expect(newBestSplitter.foundSplit).toEqual(true) expect(newBestSplitter.feature).toEqual(1) expect(newBestSplitter.threshold).toEqual(2.5) diff --git a/src/tree/Splitter.ts b/src/tree/Splitter.ts index ce408d2a..8a9c9be8 100644 --- a/src/tree/Splitter.ts +++ b/src/tree/Splitter.ts @@ -5,7 +5,7 @@ import { } from './Criterion' import shuffle from 'lodash/shuffle' import { int } from '../randUtils' -import Serialize from '../serialize' +import { Serialize } from '../simpleSerializer' export interface Split { feature: int @@ -41,16 +41,23 @@ export class Splitter extends Serialize { sampleMap: Int32Array nSamplesTotal: int nFeatures: int - name = 'splitter' - - constructor( - X: number[][], - y: int[], - minSamplesLeaf: int, - impurityMeasure: ImpurityMeasure, - maxFeatures: int, - samplesSubset: int[] = [] - ) { + name = 'Splitter' + + constructor({ + X, + y, + minSamplesLeaf, + impurityMeasure, + maxFeatures, + samplesSubset = [] + }: { + X: number[][] + y: int[] + minSamplesLeaf: int + impurityMeasure: ImpurityMeasure + maxFeatures: int + samplesSubset: int[] + }) { super() this.X = X this.y = y @@ -74,9 +81,9 @@ export class Splitter extends Serialize { } } if (impurityMeasure === 'squared_error') { - this.criterion = new RegressionCriterion(impurityMeasure, y) + this.criterion = new RegressionCriterion({ impurityMeasure, y }) } else { - this.criterion = new ClassificationCriterion(impurityMeasure, y) + this.criterion = new ClassificationCriterion({ impurityMeasure, y }) } this.featureOrder = [] for (let i = 0; i < this.nFeatures; i++) { @@ -208,43 +215,4 @@ export class Splitter extends Serialize { return currentSplit } } - - public toJson(): string { - const jsonClass = JSON.parse(super.toJson() as string) - - if (jsonClass.criterion) { - jsonClass.criterion = this.criterion.toJson() as string - } - if (this.sampleMap) jsonClass.sampleMap = Array.from(this.sampleMap) - return JSON.stringify(jsonClass) - } - - static fromJson(model: string) { - const jsonClass = JSON.parse(model) - - if (jsonClass.criterion) { - const criterionName = JSON.parse(jsonClass.criterion).name - if (criterionName == 'classificationCriterion') { - jsonClass.criterion = ClassificationCriterion.fromJson( - jsonClass.criterion - ) - } else { - jsonClass.criterion = RegressionCriterion.fromJson(jsonClass.criterion) - } - } - - if (jsonClass.sampleMap) { - jsonClass.sampleMap = new Int32Array(jsonClass.sampleMap) - } - - const splitter = new Splitter( - jsonClass.X, - jsonClass.y, - jsonClass.minSamplesLeaf, - 'squared_error', - jsonClass.samplesSubset - ) - - return Object.assign(splitter, jsonClass) as Splitter - } } pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy