From 7fbea07431dbd194259e44e868df74617814f6de Mon Sep 17 00:00:00 2001 From: Dan Crescimanno Date: Tue, 3 May 2022 21:51:56 -0700 Subject: [PATCH 1/4] feat: not complete serialization --- src/cluster/KMeans.test.ts | 16 +- src/cluster/KMeans.ts | 2 +- src/compose/ColumnTransformer.ts | 7 +- src/dummy/DummyClassifier.test.ts | 14 +- src/dummy/DummyRegressor.test.ts | 12 +- src/impute/SimpleImputer.test.ts | 21 +- src/index.ts | 8 + src/linear_model/LinearRegression.test.ts | 8 +- src/linear_model/LogisticRegression.test.ts | 6 +- src/linear_model/SgdClassifier.ts | 9 + src/linear_model/SgdRegressor.ts | 10 + src/mixins.ts | 3 +- src/naive_bayes/GaussianNB.test.ts | 5 +- src/pipeline/Pipeline.test.ts | 6 +- src/preprocessing/MinMaxScaler.test.ts | 8 +- src/simpleSerializer.ts | 223 ++++++++++++++++++++ src/tree/Criterion.test.ts | 24 ++- src/tree/Criterion.ts | 88 ++++---- src/tree/DecisionTree.test.ts | 5 +- src/tree/DecisionTree.ts | 18 +- src/tree/Splitter.test.ts | 90 ++++++-- src/tree/Splitter.ts | 46 ++-- 22 files changed, 489 insertions(+), 140 deletions(-) create mode 100644 src/simpleSerializer.ts diff --git a/src/cluster/KMeans.test.ts b/src/cluster/KMeans.test.ts index 6aa15ea9..ab7f65ca 100644 --- a/src/cluster/KMeans.test.ts +++ b/src/cluster/KMeans.test.ts @@ -1,5 +1,5 @@ import { KMeans } from './KMeans' - +import { fromObject } from '../index' // Next steps: Improve on kmeans cluster testing describe('KMeans', () => { const X = [ @@ -38,7 +38,7 @@ describe('KMeans', () => { ) }) - it('should save kmeans model', () => { + it('should save kmeans model', async () => { const expectedResult = { name: 'KMeans', nClusters: 2, @@ -48,7 +48,7 @@ describe('KMeans', () => { randomState: 0, nInit: 10, clusterCenters: { - type: 'Tensor', + name: 'Tensor', value: [ [2.5, 1], [2.5, 4] @@ -57,20 +57,20 @@ describe('KMeans', () => { } const kmean = new KMeans({ nClusters: 2, randomState: 0 }) kmean.fit(X) - const ksave = kmean.toJson() as string + const ksave = await kmean.toObject() - expect(expectedResult).toEqual(JSON.parse(ksave)) + expect(expectedResult).toEqual(ksave) }) - it('should load serialized kmeans model', () => { + it('should load serialized kmeans model', async () => { const centroids = [ [2.5, 1], [2.5, 4] ] const kmean = new KMeans({ nClusters: 2, randomState: 0 }) kmean.fit(X) - const ksave = kmean.toJson() as string - const ksaveModel = new KMeans().fromJson(ksave) + const ksave = await kmean.toObject() + const ksaveModel = await fromObject(ksave) expect(centroids).toEqual(ksaveModel.clusterCenters.arraySync()) }) diff --git a/src/cluster/KMeans.ts b/src/cluster/KMeans.ts index d5810cd1..f154084e 100644 --- a/src/cluster/KMeans.ts +++ b/src/cluster/KMeans.ts @@ -1,6 +1,6 @@ import { Scikit2D } from '../types' import { convertToNumericTensor2D, sampleWithoutReplacement } from '../utils' -import Serialize from '../serialize' +import { Serialize } from '../simpleSerializer' import { tf } from '../shared/globals' /* diff --git a/src/compose/ColumnTransformer.ts b/src/compose/ColumnTransformer.ts index 0f89059a..7109b65b 100644 --- a/src/compose/ColumnTransformer.ts +++ b/src/compose/ColumnTransformer.ts @@ -1,5 +1,6 @@ -import { DataFrameInterface, Scikit1D, Scikit2D, Transformer } from '../types' -import { isDataFrameInterface, isScikitLike2D } from '../typesUtils' +import { DataFrameInterface, Scikit1D, Transformer } from '../types' +import { isDataFrameInterface } from '../typesUtils' +import { Serialize } from '../simpleSerializer' import { tf } from '../shared/globals' /* Next steps: @@ -64,7 +65,7 @@ export interface ColumnTransformerParams { ] * ``` */ -export class ColumnTransformer { +export class ColumnTransformer extends Serialize { transformers: TransformerTriple remainder: Transformer | 'drop' | 'passthrough' diff --git a/src/dummy/DummyClassifier.test.ts b/src/dummy/DummyClassifier.test.ts index 858c5adc..419a986b 100644 --- a/src/dummy/DummyClassifier.test.ts +++ b/src/dummy/DummyClassifier.test.ts @@ -1,5 +1,5 @@ import { DummyClassifier } from './DummyClassifier' - +import { fromObject } from '../simpleSerializer' describe('DummyClassifier', function () { it('Use DummyClassifier on simple example (mostFrequent)', function () { const clf = new DummyClassifier() @@ -51,7 +51,7 @@ describe('DummyClassifier', function () { expect(scaler.classes).toEqual([1, 2, 3]) }) - it('should serialize DummyClassifier', function () { + it('should serialize DummyClassifier', async function () { const clf = new DummyClassifier() const X = [ @@ -70,10 +70,10 @@ describe('DummyClassifier', function () { } clf.fit(X, y) - const clfSave = clf.toJson() as string - expect(expectedResult).toEqual(JSON.parse(clfSave)) + const clfSave = await clf.toObject() + expect(expectedResult).toEqual(clfSave) }) - it('should load DummyClassifier', function () { + it('should load DummyClassifier', async function () { const clf = new DummyClassifier() const X = [ @@ -85,8 +85,8 @@ describe('DummyClassifier', function () { const y = [10, 20, 20, 30] clf.fit(X, y) - const clfSave = clf.toJson() as string - const newClf = new DummyClassifier().fromJson(clfSave) + const clfSave = await clf.toObject() + const newClf = await fromObject(clfSave) expect(clf).toEqual(newClf) }) }) diff --git a/src/dummy/DummyRegressor.test.ts b/src/dummy/DummyRegressor.test.ts index 04299b7e..ebbe652d 100644 --- a/src/dummy/DummyRegressor.test.ts +++ b/src/dummy/DummyRegressor.test.ts @@ -1,5 +1,5 @@ import { DummyRegressor } from './DummyRegressor' - +import { toObject, fromObject } from '../simpleSerializer' describe('DummyRegressor', function () { it('Use DummyRegressor on simple example (mean)', function () { const reg = new DummyRegressor() @@ -55,7 +55,7 @@ describe('DummyRegressor', function () { reg.fit(X, y) expect(reg.predict(predictX).arraySync()).toEqual([10, 10, 10]) }) - it('Should save DummyRegressor', function () { + it('Should save DummyRegressor', async function () { const reg = new DummyRegressor({ strategy: 'constant', constant: 10 }) const X = [ @@ -73,10 +73,10 @@ describe('DummyRegressor', function () { reg.fit(X, y) - expect(saveResult).toEqual(JSON.parse(reg.toJson() as string)) + expect(saveResult).toEqual(await toObject(reg)) }) - it('Should load serialized DummyRegressor', function () { + it('Should load serialized DummyRegressor', async function () { const reg = new DummyRegressor({ strategy: 'constant', constant: 10 }) const X = [ @@ -92,8 +92,8 @@ describe('DummyRegressor', function () { ] reg.fit(X, y) - const saveReg = reg.toJson() as string - const newReg = new DummyRegressor().fromJson(saveReg) + const saveReg = await toObject(reg) + const newReg = await fromObject(saveReg) expect(newReg.predict(predictX).arraySync()).toEqual([10, 10, 10]) }) diff --git a/src/impute/SimpleImputer.test.ts b/src/impute/SimpleImputer.test.ts index fb58af50..c0969a80 100644 --- a/src/impute/SimpleImputer.test.ts +++ b/src/impute/SimpleImputer.test.ts @@ -1,6 +1,6 @@ import { tf } from '../shared/globals' import { SimpleImputer } from './SimpleImputer' - +import { toObject, fromObject } from '../simpleSerializer' describe('SimpleImputer', function () { it('Imputes with "constant" strategy 2D one column. In this strategy, we give the fill value', function () { const imputer = new SimpleImputer({ strategy: 'constant', fillValue: 3 }) @@ -119,7 +119,7 @@ describe('SimpleImputer', function () { expect(returned.arraySync()).toEqual(expected) expect(imputer.transform([[NaN, NaN]]).arraySync()).toEqual([[4, 3]]) }) - it('Should serialized Imputer', function () { + it('Should serialized Imputer', async function () { const imputer = new SimpleImputer({ strategy: 'mostFrequent' }) const data = [ @@ -129,21 +129,21 @@ describe('SimpleImputer', function () { [4, 2], [6, NaN] ] - const expected = { name: 'SimpleImputer', - missingValues: null, + missingValues: NaN, + fillValue: undefined, strategy: 'mostFrequent', statistics: { - type: 'Tensor', + name: 'Tensor', value: [4, 3] } } - const returned = imputer.fitTransform(data) - expect(JSON.parse(imputer.toJson() as string)).toEqual(expected) + imputer.fitTransform(data) + expect(await toObject(imputer)).toEqual(expected) }) - it('Should load serialized Imputer', function () { + it('Should load serialized Imputer', async function () { const imputer = new SimpleImputer({ strategy: 'mostFrequent' }) const data = [ @@ -162,8 +162,9 @@ describe('SimpleImputer', function () { [6, 3] ] - const returned = imputer.fitTransform(data) - const newImputer = new SimpleImputer().fromJson(imputer.toJson() as string) + imputer.fitTransform(data) + const thing = await toObject(imputer) + const newImputer = await fromObject(thing) const newReturned = newImputer.transform(data) expect(newReturned.arraySync()).toEqual(expected) expect(newImputer.transform([[NaN, NaN]]).arraySync()).toEqual([[4, 3]]) diff --git a/src/index.ts b/src/index.ts index 6ab8fb07..22917ec5 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,3 +1,5 @@ +import Serialize from './serialize' + /** * @license * Copyright 2021, JsData. All rights reserved. @@ -83,3 +85,9 @@ export { DecisionTreeRegressor, DecisionTreeRegressorParams } from './tree/DecisionTree' + +export { fromObject, Serialize } from './simpleSerializer' + +export { ClassificationCriterion, RegressionCriterion } from './tree/Criterion' +export { Splitter } from './tree/Splitter' +export { DecisionTreeBase, DecisionTree } from './tree/DecisionTree' diff --git a/src/linear_model/LinearRegression.test.ts b/src/linear_model/LinearRegression.test.ts index a4d67295..1753fa4c 100644 --- a/src/linear_model/LinearRegression.test.ts +++ b/src/linear_model/LinearRegression.test.ts @@ -1,7 +1,7 @@ import { LinearRegression } from './LinearRegression' import { tensorEqual } from '../utils' import { tf } from '../shared/globals' - +import { toObject, fromObject } from '../simpleSerializer' function roughlyEqual(a: number, b: number, tol = 0.1) { return Math.abs(a - b) < tol } @@ -148,8 +148,10 @@ describe('LinearRegression', function () { const lr = new LinearRegression({ fitIntercept: false }) await lr.fit(mediumX, yPlusJitter) - const serialized = await lr.toJson() - const newModel = new LinearRegression({}).fromJson(serialized) + const serialized = await lr.toObject() + console.log({ serialized }) + const newModel = await fromObject(serialized) + console.log(newModel) expect(tensorEqual(newModel.coef, tf.tensor1d([2.5, 1]), 0.1)).toBe(true) expect(roughlyEqual(newModel.intercept as number, 0)).toBe(true) diff --git a/src/linear_model/LogisticRegression.test.ts b/src/linear_model/LogisticRegression.test.ts index c00db0fa..3dde416f 100644 --- a/src/linear_model/LogisticRegression.test.ts +++ b/src/linear_model/LogisticRegression.test.ts @@ -1,6 +1,6 @@ import { LogisticRegression } from './LogisticRegression' import { tf } from '../shared/globals' - +import { fromObject } from '../simpleSerializer' describe('LogisticRegression', function () { it('Works on arrays (small example)', async function () { const lr = new LogisticRegression() @@ -133,8 +133,8 @@ describe('LogisticRegression', function () { let logreg = new LogisticRegression({ penalty: 'l2' }) await logreg.fit(X, y) - const serializeModel = await logreg.toJson() - const newModel = logreg.fromJson(serializeModel) + const serializeModel = await logreg.toObject() + const newModel = await fromObject(serializeModel) const newModelResult = newModel.predict(Xtest) expect(newModelResult.arraySync()).toEqual([0, 0, 0, 0, 0, 0, 2, 2, 2]) diff --git a/src/linear_model/SgdClassifier.ts b/src/linear_model/SgdClassifier.ts index b5900a39..e3579da6 100644 --- a/src/linear_model/SgdClassifier.ts +++ b/src/linear_model/SgdClassifier.ts @@ -413,4 +413,13 @@ export class SGDClassifier extends ClassifierMixin { public fromJson(model: string) { return fromJson(this, model) as this } + + // public async toObject(): Promise { + // let { toObject } = await import('../simpleSerializer') + // return await toObject(this, [ + // 'modelCompileArgs', + // 'modelFitArgs', + // 'denseLayerArgs' + // ]) + // } } diff --git a/src/linear_model/SgdRegressor.ts b/src/linear_model/SgdRegressor.ts index 917ac73c..d3bad4e4 100644 --- a/src/linear_model/SgdRegressor.ts +++ b/src/linear_model/SgdRegressor.ts @@ -21,6 +21,7 @@ import { import { Scikit2D, Scikit1D, OptimizerTypes, LossTypes } from '../types' import { RegressorMixin } from '../mixins' import { fromJson, toJSON } from './modelSerializer' + /** * SGD is a thin Wrapper around Tensorflow's model api with a single dense layer. * With this base class and different error functions / regularizers we can @@ -208,6 +209,15 @@ export class SGDRegressor extends RegressorMixin { return this } + public async toObject(): Promise { + let { toObject } = await import('../simpleSerializer') + return await toObject(this, [ + 'modelCompileArgs', + 'modelFitArgs', + 'denseLayerArgs' + ]) + } + /** * Similar to scikit-learn, this returns the object of configuration params for SGD * @returns {SGDRegressorParams} Returns an object of configuration params. diff --git a/src/mixins.ts b/src/mixins.ts index d826e455..38d9de8e 100644 --- a/src/mixins.ts +++ b/src/mixins.ts @@ -1,6 +1,7 @@ import { Scikit2D, Scikit1D } from './types' import { r2Score, accuracyScore } from './metrics/metrics' -import Serialize from './serialize' +// import Serialize from './serialize' +import { Serialize } from './simpleSerializer' import { tf } from './shared/globals' export class TransformerMixin extends Serialize { // We assume that fit and transform exist diff --git a/src/naive_bayes/GaussianNB.test.ts b/src/naive_bayes/GaussianNB.test.ts index 627018e8..abce7a22 100644 --- a/src/naive_bayes/GaussianNB.test.ts +++ b/src/naive_bayes/GaussianNB.test.ts @@ -13,6 +13,7 @@ * ========================================================================== */ import { GaussianNB } from './GaussianNB' +import { toObject, fromObject } from '../simpleSerializer' describe('GaussianNB', function () { it('without priors', async () => { @@ -101,8 +102,8 @@ describe('GaussianNB', function () { await model.fit(X, y) const labels = model.predict(X) - const serializeModel = model.toJson() - const newModel = new GaussianNB().fromJson(serializeModel) + const serializeModel = await toObject(model) + const newModel = await fromObject(serializeModel) expect(newModel.predict(X).arraySync()).toEqual([0, 0, 1, 1, 1]) }) }) diff --git a/src/pipeline/Pipeline.test.ts b/src/pipeline/Pipeline.test.ts index 52ead335..7c10306b 100644 --- a/src/pipeline/Pipeline.test.ts +++ b/src/pipeline/Pipeline.test.ts @@ -4,6 +4,7 @@ import { tensorEqual } from '../utils' import { LinearRegression } from '../linear_model/LinearRegression' import { SimpleImputer } from '../impute/SimpleImputer' import { MinMaxScaler } from '../preprocessing/MinMaxScaler' +import { toObject, fromObject } from '../simpleSerializer' describe('Pipeline', function () { it('Use a Pipeline (min-max scaler, and linear regression)', async function () { @@ -96,8 +97,9 @@ describe('Pipeline', function () { await pipeline.fit(X, y) - const saveModel = (await pipeline.toJson()) as string - const newPipeLine = new Pipeline().fromJson(saveModel) + const saveModel = await toObject(pipeline) + console.log(saveModel) + const newPipeLine = await fromObject(saveModel) expect(newPipeLine.steps[1][1].min.arraySync()).toEqual([0, 0]) expect( diff --git a/src/preprocessing/MinMaxScaler.test.ts b/src/preprocessing/MinMaxScaler.test.ts index 7b15cc47..6e995b47 100644 --- a/src/preprocessing/MinMaxScaler.test.ts +++ b/src/preprocessing/MinMaxScaler.test.ts @@ -3,7 +3,7 @@ import * as dfd from 'danfojs-node' import { isDataFrameInterface, isSeriesInterface } from '../typesUtils' import { ScikitVecOrMatrix } from '../types' import { tf } from '../shared/globals' - +import { toObject, fromObject } from '../simpleSerializer' export function convertTensorToInputType( tensor: tf.Tensor, inputData: ScikitVecOrMatrix @@ -161,12 +161,12 @@ describe('MinMaxscaler', function () { 0 ]) }) - it('Serialize and unserialize MinMaxScaler', function () { + it('Serialize and unserialize MinMaxScaler', async function () { const data = tf.tensor2d([4, 4, 'whoops', 3, 3] as any, [5, 1]) const scaler = new MinMaxScaler() scaler.fit(data) - const serial = scaler.toJson() as string - const newModel = new MinMaxScaler().fromJson(serial) + const serial = (await toObject(scaler)) as string + const newModel = await fromObject(serial) expect(newModel.transform(data).arraySync().flat()).toEqual([ 1, 1, diff --git a/src/simpleSerializer.ts b/src/simpleSerializer.ts new file mode 100644 index 00000000..ca8ee217 --- /dev/null +++ b/src/simpleSerializer.ts @@ -0,0 +1,223 @@ +import { tf } from './shared/globals' + +const EstimatorList = [ + 'KNeighborsRegressor', + 'LinearRegression', + 'LassoRegression', + 'RidgeRegression', + 'ElasticNet', + 'LogisticRegression', + 'DummyRegressor', + 'DummyClassifier', + 'MinMaxScaler', + 'StandardScaler', + 'MaxAbsScaler', + 'SimpleImputer', + 'OneHotEncoder', + 'LabelEncoder', + 'OrdinalEncoder', + 'Normalizer', + 'Pipeline', + 'ColumnTransformer', + 'RobustScaler', + 'KMeans', + 'VotingRegressor', + 'VotingClassifier', + 'LinearSVC', + 'LinearSVR', + 'GaussianNB', + 'DecisionTreeClassifier', + 'DecisionTreeRegressor', + 'ClassificationCriterion', + 'RegressionCriterion', + 'Splitter', + 'DecisionTreeBase', + 'DecisionTree' +] + +/** + * 1. Make a list called EstimatorList + * 2. Do a dynamic import here + */ + +class JSONHandler { + savedArtifacts: any + constructor(artifacts?: any) { + this.savedArtifacts = artifacts || null + } + + async save(artifacts: any) { + // Base 64 encoding + this.savedArtifacts = artifacts + return { + modelArtifactsInfo: { + dateSaved: new Date(), + modelTopologyType: 'JSON', + modelTopologyBytes: JSON.stringify(artifacts.modelTopology).length, + weightSpecsBytes: JSON.stringify(artifacts.weightSpecs).length, + weightDataBytes: artifacts.weightData.byteLength + } + } + } + + async load() { + // Base64 decode + return this.savedArtifacts + } +} + +export async function toObjectInner( + val: any, + ignoreKeys: string[] = [] +): Promise { + // console.log(val) + if (['number', 'string', 'undefined', 'boolean'].includes(typeof val)) { + return val + } + + if (typeof val === 'function') { + console.warn( + `warning: Serializing function ${val}. Not going to be able to deserialize this later.` + ) + if (val.name) { + return val.name + } + } + + if (typeof val === 'object') { + // Null case + if (val === null) { + return null + } + // Array case + if (Array.isArray(val)) { + return await Promise.all(val.map(async (el) => await toObjectInner(el))) + } + + // Serialize a Tensor + if (val instanceof tf.Tensor) { + return { + name: 'Tensor', + value: val.arraySync() + } + } + + // Int32Array serialization. Used for DecisionTrees + if (val instanceof Int32Array) { + return { + name: 'Int32Array', + value: Array.from(val) + } + } + + // The tf object + if (val.ENV && val.AdadeltaOptimizer && val.version) { + return { + name: 'TF', + version: val.version.tfjs + } + } + + // tf.layers model + if (val instanceof tf.Sequential) { + let mem = new JSONHandler() + await val.save(mem as any) + return { + name: 'Sequential', + artifacts: mem.savedArtifacts + } + } + + if (EstimatorList.includes(val.name)) { + if (val.toObject) { + return val.toObject() + } + } + + // Generic object case / class case + let response: any = {} + for (let key of Object.keys(val)) { + // Ignore all the keys that we choose to + if (ignoreKeys.includes(key)) { + continue + } + // Ignore any function when we serialize + // if (typeof val[key] === 'function') { + // continue + // } + response[key] = await toObjectInner(val[key]) + } + return response + } +} + +export async function fromObjectInner(val: any): Promise { + // Ignores all types that aren't objects + if (typeof val !== 'object') { + return val + } + + // Null case + if (val === null) { + return null + } + + // Make a Tensor + if (val.name === 'Tensor') { + return tf.tensor(val.value) + } + + if (val.name === 'Sequential') { + let newMem = new JSONHandler(val.artifacts) + return await tf.loadLayersModel(newMem as any) + } + + if (val.name === 'Int32Array') { + return new Int32Array(val.value) + } + + // Array case + if (Array.isArray(val)) { + return await Promise.all(val.map(async (el) => await fromObjectInner(el))) + } + + // Generic object case + for (let key of Object.keys(val)) { + val[key] = await fromObjectInner(val[key]) + } + + // Make a model + if (EstimatorList.includes(val.name)) { + // Do dynamic import to avoid circular dependency tree + // Every class extends this class and therefor it + // can't import those classes in here + let module = await import('./index') + let model = (module as any)[val.name] + + let resultObj = new model(val) + for (let key of Object.keys(val)) { + resultObj[key] = val[key] + } + return resultObj + } + + return val +} + +export async function fromObject(val: any): Promise { + try { + return await fromObjectInner(val) + } catch (e) { + console.error(e) + } +} + +export class Serialize { + async toObject(ignoreKeys: string[] = []): Promise { + try { + return await toObjectInner(this, ignoreKeys) + } catch (e) { + console.error(e) + } + } +} diff --git a/src/tree/Criterion.test.ts b/src/tree/Criterion.test.ts index c58617fc..e2ed5a32 100644 --- a/src/tree/Criterion.test.ts +++ b/src/tree/Criterion.test.ts @@ -1,5 +1,5 @@ import { ClassificationCriterion, giniCoefficient, entropy } from './Criterion' - +import { toObject, fromObject } from '../simpleSerializer' describe('Criterion', function () { let X = [ [-2, -1], @@ -15,7 +15,7 @@ describe('Criterion', function () { sampleMap[i] = i } it('Use the criterion (init)', async function () { - let criterion = new ClassificationCriterion('gini', y) + let criterion = new ClassificationCriterion({ impurityMeasure: 'gini', y }) criterion.init(0, 6, sampleMap) expect(criterion.start).toEqual(0) @@ -29,7 +29,7 @@ describe('Criterion', function () { expect(criterion.labelFreqsRight[1]).toEqual(0) }, 1000) it('Use the criterion (update)', async function () { - let criterion = new ClassificationCriterion('gini', y) + let criterion = new ClassificationCriterion({ impurityMeasure: 'gini', y }) criterion.init(0, 6, sampleMap) criterion.update(3, sampleMap) @@ -40,20 +40,23 @@ describe('Criterion', function () { expect(criterion.labelFreqsRight[1]).toEqual(3) }, 1000) it('Use the criterion (gini)', async function () { - let criterion = new ClassificationCriterion('gini', y) + let criterion = new ClassificationCriterion({ impurityMeasure: 'gini', y }) criterion.init(0, 6, sampleMap) expect(criterion.nodeImpurity()).toEqual(0.5) }, 1000) it('Use the criterion (entropy)', async function () { - let criterion = new ClassificationCriterion('entropy', y) + let criterion = new ClassificationCriterion({ + impurityMeasure: 'entropy', + y + }) criterion.init(0, 6, sampleMap) expect(criterion.nodeImpurity()).toEqual(1) }, 1000) it('Use the criterion (gini update)', async function () { - let criterion = new ClassificationCriterion('gini', y) + let criterion = new ClassificationCriterion({ impurityMeasure: 'gini', y }) criterion.init(0, 6, sampleMap) criterion.update(4, sampleMap) @@ -75,10 +78,13 @@ describe('Criterion', function () { expect(entropy(labelFreqs, nSamples)).toEqual(0.7219280948873623) }, 1000) it('Use the criterion (entropy)', async function () { - let criterion = new ClassificationCriterion('entropy', y) + let criterion = new ClassificationCriterion({ + impurityMeasure: 'entropy', + y + }) criterion.init(0, 6, sampleMap) - const serial = criterion.toJson() as string - const newCriterion = ClassificationCriterion.fromJson(serial) + const serial = await toObject(criterion) + const newCriterion = await fromObject(serial) expect(newCriterion.nodeImpurity()).toEqual(1) }, 1000) }) diff --git a/src/tree/Criterion.ts b/src/tree/Criterion.ts index d7fd6cae..3f57c2de 100644 --- a/src/tree/Criterion.ts +++ b/src/tree/Criterion.ts @@ -43,7 +43,7 @@ function arrayMax(labels: int[]) { export class ClassificationCriterion extends Serialize { y: int[] impurityMeasure: ImpurityMeasure - impurityFunc: (labelFreqs: int[], nSamples: int) => number + // impurityFunc: (labelFreqs: int[], nSamples: int) => number start: int = 0 end: int = 0 pos: int = 0 @@ -54,21 +54,19 @@ export class ClassificationCriterion extends Serialize { nSamples: int = 0 nSamplesLeft: int = 0 nSamplesRight: int = 0 - name = 'classificationCriterion' - - constructor(impurityMeasure: ImpurityMeasure, y: number[]) { + name = 'ClassificationCriterion' + + constructor({ + impurityMeasure, + y + }: { + impurityMeasure: ImpurityMeasure + y: number[] + }) { super() - assert( - ['gini', 'entropy'].includes(impurityMeasure), - 'Unkown impurity measure. Only supports gini, and entropy' - ) this.impurityMeasure = impurityMeasure - if (this.impurityMeasure === 'gini') { - this.impurityFunc = giniCoefficient - } else { - this.impurityFunc = entropy - } + // This assumes that the labels are 0,1,2,...,(n-1) this.nLabels = arrayMax(y) + 1 this.y = y @@ -116,12 +114,12 @@ export class ClassificationCriterion extends Serialize { } childrenImpurities() { + let impurityFunc = + this.impurityMeasure === 'gini' ? giniCoefficient : entropy + return { - impurityLeft: this.impurityFunc(this.labelFreqsLeft, this.nSamplesLeft), - impurityRight: this.impurityFunc( - this.labelFreqsRight, - this.nSamplesRight - ) + impurityLeft: impurityFunc(this.labelFreqsLeft, this.nSamplesLeft), + impurityRight: impurityFunc(this.labelFreqsRight, this.nSamplesRight) } } @@ -134,7 +132,10 @@ export class ClassificationCriterion extends Serialize { } nodeImpurity() { - return this.impurityFunc(this.labelFreqsTotal, this.nSamples) + let impurityFunc = + this.impurityMeasure === 'gini' ? giniCoefficient : entropy + + return impurityFunc(this.labelFreqsTotal, this.nSamples) } nodeValue() { @@ -143,10 +144,10 @@ export class ClassificationCriterion extends Serialize { static fromJson(model: string) { const jsonClass = JSON.parse(model) - const newModel = new ClassificationCriterion( - jsonClass.impurityMeasure, - jsonClass.y - ) + const newModel = new ClassificationCriterion({ + impurityMeasure: jsonClass.impurityMeasure, + y: jsonClass.y + }) return Object.assign(newModel, jsonClass) } } @@ -154,7 +155,7 @@ export class ClassificationCriterion extends Serialize { export class RegressionCriterion extends Serialize { y: number[] impurityMeasure: 'squared_error' - impurityFunc: (ySquaredSum: number, ySum: number, nSamples: int) => number + // impurityFunc: (ySquaredSum: number, ySum: number, nSamples: int) => number start: int = 0 end: int = 0 pos: int = 0 @@ -167,18 +168,23 @@ export class RegressionCriterion extends Serialize { nSamples: int = 0 nSamplesLeft: int = 0 nSamplesRight: int = 0 - name = 'regressionCriterion' - - constructor(impurityMeasure: 'squared_error', y: number[]) { + name = 'RegressionCriterion' + + constructor({ + impurityMeasure, + y + }: { + impurityMeasure: 'squared_error' + y: number[] + }) { super() - assert( - ['squared_error'].includes(impurityMeasure), - 'Unkown impurity measure. Only supports squared_error' - ) + + // We don't assert in the constructor, we assert in fit in accordance with the sklearn docs // Support MAE one day this.impurityMeasure = impurityMeasure - this.impurityFunc = mse + // We don't set the impurityFunc here because we need it to be serializable as an object + // this.impurityFunc = mse this.y = y } @@ -224,13 +230,15 @@ export class RegressionCriterion extends Serialize { } childrenImpurities() { + // once we get another impurity function we can do a ternary here + let impurityFunc = mse return { - impurityLeft: this.impurityFunc( + impurityLeft: impurityFunc( this.squaredSumLeft, this.sumTotalLeft, this.nSamplesLeft ), - impurityRight: this.impurityFunc( + impurityRight: impurityFunc( this.squaredSumRight, this.sumTotalRight, this.nSamplesRight @@ -247,7 +255,9 @@ export class RegressionCriterion extends Serialize { } nodeImpurity() { - return this.impurityFunc(this.squaredSum, this.sumTotal, this.nSamples) + // once we get another impurity function we can do a ternary here + let impurityFunc = mse + return impurityFunc(this.squaredSum, this.sumTotal, this.nSamples) } nodeValue() { @@ -256,10 +266,10 @@ export class RegressionCriterion extends Serialize { static fromJson(model: string) { const jsonClass = JSON.parse(model) - const newModel = new RegressionCriterion( - jsonClass.impurityMeasure, - jsonClass.y - ) + const newModel = new RegressionCriterion({ + impurityMeasure: jsonClass.impurityMeasure, + y: jsonClass.y + }) return Object.assign(newModel, jsonClass) } } diff --git a/src/tree/DecisionTree.test.ts b/src/tree/DecisionTree.test.ts index 7e1a5207..d2fc7728 100644 --- a/src/tree/DecisionTree.test.ts +++ b/src/tree/DecisionTree.test.ts @@ -1,6 +1,7 @@ import { DecisionTreeClassifier, DecisionTreeRegressor } from './DecisionTree' import { dataUrls } from '../datasets/datasets' import * as dfd from 'danfojs-node' +import { toObject, fromObject } from '../simpleSerializer' describe('DecisionTree', function () { it('Use the DecisionTree (toy)', async function () { @@ -620,8 +621,8 @@ describe('DecisionTree', function () { let tree_classifier = new DecisionTreeClassifier() tree_classifier.fit(X, y) - const serial = tree_classifier.toJson() - const newTree = new DecisionTreeClassifier().fromJson(serial) + const serial = await toObject(tree_classifier) + const newTree = await fromObject(serial) expect(newTree.predict(T)).toEqual(true_result) }, 1000) }) diff --git a/src/tree/DecisionTree.ts b/src/tree/DecisionTree.ts index 292dca08..a4659451 100644 --- a/src/tree/DecisionTree.ts +++ b/src/tree/DecisionTree.ts @@ -48,9 +48,10 @@ function argMax(array: number[]) { return array.map((x, i) => [x, i]).reduce((r, a) => (a[0] > r[0] ? a : r))[1] } -class DecisionTree { +export class DecisionTree { nodes: Node[] = [] isBuilt = false + name = 'DecisionTree' getLeafNodes(X: number[][]): int[] { let leafNodeIds: int[] = [] @@ -140,7 +141,7 @@ interface DecisionTreeBaseParams { minImpurityDecrease?: number } -class DecisionTreeBase extends Serialize { +export class DecisionTreeBase extends Serialize { splitter!: Splitter stack: NodeRecord[] = [] minSamplesLeaf: int @@ -173,6 +174,7 @@ class DecisionTreeBase extends Serialize { this.minImpurityDecrease = minImpurityDecrease this.maxFeaturesNumb = 0 this.tree = new DecisionTree() + this.name = 'DecisionTreeBase' } calcMaxFeatures( nFeatures: int, @@ -203,14 +205,14 @@ class DecisionTreeBase extends Serialize { // CheckNegativeLabels(yptr); this.maxFeaturesNumb = this.calcMaxFeatures(X[0].length, this.maxFeatures) - this.splitter = new Splitter( + this.splitter = new Splitter({ X, y, - this.minSamplesLeaf, - this.criterion, - this.maxFeaturesNumb, - newSamplesSubset - ) + minSamplesLeaf: this.minSamplesLeaf, + impurityMeasure: this.criterion, + maxFeatures: this.maxFeaturesNumb, + samplesSubset: newSamplesSubset + }) // put root node on stack let rootNode: NodeRecord = { diff --git a/src/tree/Splitter.test.ts b/src/tree/Splitter.test.ts index ff7d7a6c..a8b164f1 100644 --- a/src/tree/Splitter.test.ts +++ b/src/tree/Splitter.test.ts @@ -1,5 +1,6 @@ import { ImpurityMeasure } from './Criterion' import { Splitter } from './Splitter' +import { toObject, fromObject } from '../simpleSerializer' describe('Splitter', function () { let types = ['gini', 'entropy', 'squared_error'] @@ -8,7 +9,14 @@ describe('Splitter', function () { let y = [0, 0, 0, 1, 1, 1] types.forEach((type) => { - let splitter = new Splitter(X, y, 1, type as ImpurityMeasure, 1, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 1, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 1, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.threshold).toEqual(0) @@ -21,7 +29,14 @@ describe('Splitter', function () { let y = [1, 1, 0, 1, 1, 1] types.forEach((type) => { - let splitter = new Splitter(X, y, 1, type as ImpurityMeasure, 1, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 1, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 1, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.threshold).toEqual(0) @@ -34,7 +49,14 @@ describe('Splitter', function () { let y = [1, 0, 1, 1, 1, 1] types.forEach((type) => { - let splitter = new Splitter(X, y, 1, type as ImpurityMeasure, 1, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 1, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 1, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.threshold).toEqual(-0.5) expect(bestSplit.feature).toEqual(0) @@ -47,7 +69,14 @@ describe('Splitter', function () { let y = [1, 1, 1, 1, 2, 2, 2, 2] types.forEach((type) => { - let splitter = new Splitter(X, y, 1, type as ImpurityMeasure, 1, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 1, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 1, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.foundSplit).toEqual(false) expect(bestSplit.threshold).toEqual(0) @@ -60,7 +89,14 @@ describe('Splitter', function () { let y = [1, 1, 1, 2, 2, 2, 2, 2] types.forEach((type) => { - let splitter = new Splitter(X, y, 4, type as ImpurityMeasure, 1, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 4, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 1, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.foundSplit).toEqual(true) expect(bestSplit.feature).toEqual(0) @@ -73,7 +109,14 @@ describe('Splitter', function () { let y = [1, 1, 1, 2, 2, 2, 2, 2] types.forEach((type) => { - let splitter = new Splitter(X, y, 4, type as ImpurityMeasure, 1, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 4, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 1, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.foundSplit).toEqual(true) expect(bestSplit.feature).toEqual(0) @@ -95,7 +138,14 @@ describe('Splitter', function () { let y = [1, 1, 1, 1, 2, 2, 2, 2] types.forEach((type) => { - let splitter = new Splitter(X, y, 1, type as ImpurityMeasure, 20, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 1, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 20, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.foundSplit).toEqual(true) expect(bestSplit.feature).toEqual(1) @@ -117,7 +167,14 @@ describe('Splitter', function () { let y = [2, 1, 1, 2, 1, 2, 2, 1] types.forEach((type) => { - let splitter = new Splitter(X, y, 1, type as ImpurityMeasure, 20, []) + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 1, + impurityMeasure: type as ImpurityMeasure, + maxFeatures: 20, + samplesSubset: [] + }) let bestSplit = splitter.splitNode() expect(bestSplit.foundSplit).toEqual(true) expect(bestSplit.feature).toEqual(1) @@ -137,11 +194,18 @@ describe('Splitter', function () { [0, 1] ] let y = [2, 1, 1, 2, 1, 2, 2, 1] - let splitter = new Splitter(X, y, 1, 'gini', 20, []) - let bestSplit = splitter.splitNode() - const serial = splitter.toJson() - const newSpliter = Splitter.fromJson(serial) - const newBestSplitter = newSpliter.splitNode() + let splitter = new Splitter({ + X, + y, + minSamplesLeaf: 1, + impurityMeasure: 'gini', + maxFeatures: 20, + samplesSubset: [] + }) + splitter.splitNode() + const serial = await toObject(splitter) + const newSplitter = await fromObject(serial) + const newBestSplitter = newSplitter.splitNode() expect(newBestSplitter.foundSplit).toEqual(true) expect(newBestSplitter.feature).toEqual(1) expect(newBestSplitter.threshold).toEqual(2.5) diff --git a/src/tree/Splitter.ts b/src/tree/Splitter.ts index ce408d2a..c53750a9 100644 --- a/src/tree/Splitter.ts +++ b/src/tree/Splitter.ts @@ -41,16 +41,23 @@ export class Splitter extends Serialize { sampleMap: Int32Array nSamplesTotal: int nFeatures: int - name = 'splitter' - - constructor( - X: number[][], - y: int[], - minSamplesLeaf: int, - impurityMeasure: ImpurityMeasure, - maxFeatures: int, - samplesSubset: int[] = [] - ) { + name = 'Splitter' + + constructor({ + X, + y, + minSamplesLeaf, + impurityMeasure, + maxFeatures, + samplesSubset = [] + }: { + X: number[][] + y: int[] + minSamplesLeaf: int + impurityMeasure: ImpurityMeasure + maxFeatures: int + samplesSubset: int[] + }) { super() this.X = X this.y = y @@ -74,9 +81,9 @@ export class Splitter extends Serialize { } } if (impurityMeasure === 'squared_error') { - this.criterion = new RegressionCriterion(impurityMeasure, y) + this.criterion = new RegressionCriterion({ impurityMeasure, y }) } else { - this.criterion = new ClassificationCriterion(impurityMeasure, y) + this.criterion = new ClassificationCriterion({ impurityMeasure, y }) } this.featureOrder = [] for (let i = 0; i < this.nFeatures; i++) { @@ -237,13 +244,14 @@ export class Splitter extends Serialize { jsonClass.sampleMap = new Int32Array(jsonClass.sampleMap) } - const splitter = new Splitter( - jsonClass.X, - jsonClass.y, - jsonClass.minSamplesLeaf, - 'squared_error', - jsonClass.samplesSubset - ) + const splitter = new Splitter({ + X: jsonClass.X, + y: jsonClass.y, + minSamplesLeaf: jsonClass.minSamplesLeaf, + impurityMeasure: 'squared_error', + maxFeatures: jsonClass.maxFeatures, + samplesSubset: jsonClass.samplesSubset + }) return Object.assign(splitter, jsonClass) as Splitter } From ec713230efbf07ac228c0291c21f5fcc0b1bd995 Mon Sep 17 00:00:00 2001 From: Dan Crescimanno Date: Sat, 7 May 2022 20:59:06 -0700 Subject: [PATCH 2/4] feat: updated serialization --- src/cluster/KMeans.test.ts | 3 +- src/compose/ColumnTransformer.test.ts | 31 ++++++- src/compose/ColumnTransformer.ts | 1 + src/dummy/DummyClassifier.test.ts | 3 +- src/dummy/DummyRegressor.test.ts | 11 +-- src/ensemble/VotingClassifier.test.ts | 15 ++-- src/ensemble/VotingClassifier.ts | 10 --- src/ensemble/VotingRegressor.test.ts | 14 ++-- src/ensemble/VotingRegressor.ts | 10 --- src/ensemble/serializeEnsemble.ts | 90 -------------------- src/impute/SimpleImputer.test.ts | 8 +- src/index.ts | 7 +- src/linear_model/LinearRegression.test.ts | 5 +- src/linear_model/LogisticRegression.test.ts | 3 +- src/linear_model/SgdClassifier.ts | 20 +---- src/linear_model/SgdRegressor.ts | 19 ----- src/linear_model/modelSerializer.ts | 91 --------------------- src/mixins.ts | 1 - src/naive_bayes/BaseNaiveBayes.ts | 35 +------- src/naive_bayes/GaussianNB.test.ts | 7 +- src/neighbors/KNeighborsBase.ts | 2 +- src/neighbors/KNeighborsRegressor.test.ts | 4 +- src/pipeline/Pipeline.test.ts | 16 ++-- src/pipeline/Pipeline.ts | 12 +-- src/preprocessing/LabelEncoder.test.ts | 2 +- src/preprocessing/LabelEncoder.ts | 2 +- src/preprocessing/MaxAbsScaler.test.ts | 16 +++- src/preprocessing/MinMaxScaler.test.ts | 5 +- src/preprocessing/Normalizer.test.ts | 2 +- src/preprocessing/OneHotEncoder.test.ts | 2 +- src/preprocessing/OrdinalEncoder.test.ts | 2 +- src/preprocessing/RobustScaler.test.ts | 2 +- src/preprocessing/StandardScaler.test.ts | 2 +- src/serialize.ts | 50 ----------- src/simpleSerializer.ts | 22 ++--- src/tree/Criterion.test.ts | 4 +- src/tree/Criterion.ts | 3 +- src/tree/DecisionTree.test.ts | 4 +- src/tree/DecisionTree.ts | 34 +------- src/tree/Splitter.test.ts | 4 +- src/tree/Splitter.ts | 42 +--------- 41 files changed, 128 insertions(+), 488 deletions(-) delete mode 100644 src/ensemble/serializeEnsemble.ts delete mode 100644 src/linear_model/modelSerializer.ts delete mode 100644 src/serialize.ts diff --git a/src/cluster/KMeans.test.ts b/src/cluster/KMeans.test.ts index ab7f65ca..ac737fc5 100644 --- a/src/cluster/KMeans.test.ts +++ b/src/cluster/KMeans.test.ts @@ -1,5 +1,4 @@ -import { KMeans } from './KMeans' -import { fromObject } from '../index' +import { fromObject, KMeans } from '../index' // Next steps: Improve on kmeans cluster testing describe('KMeans', () => { const X = [ diff --git a/src/compose/ColumnTransformer.test.ts b/src/compose/ColumnTransformer.test.ts index 063da473..9dd79e1b 100644 --- a/src/compose/ColumnTransformer.test.ts +++ b/src/compose/ColumnTransformer.test.ts @@ -1,6 +1,9 @@ -import { ColumnTransformer } from './ColumnTransformer' -import { MinMaxScaler } from '../preprocessing/MinMaxScaler' -import { SimpleImputer } from '../impute/SimpleImputer' +import { + fromObject, + SimpleImputer, + MinMaxScaler, + ColumnTransformer +} from '../index' import * as dfd from 'danfojs-node' describe('ColumnTransformer', function () { @@ -30,4 +33,26 @@ describe('ColumnTransformer', function () { expect(result.arraySync()).toEqual(expected) }) + it('ColumnTransformer serialize/deserialize test', async function () { + const X = [ + [2, 2], // [1, .5] + [2, 3], // [1, .75] + [0, NaN], // [0, 1] + [2, 0] // [.5, 0] + ] + let newDf = new dfd.DataFrame(X) + + const transformer = new ColumnTransformer({ + transformers: [ + ['minmax', new MinMaxScaler(), [0]], + ['simpleImpute', new SimpleImputer({ strategy: 'median' }), [1]] + ] + }) + + transformer.fitTransform(newDf) + let obj = await transformer.toObject() + let myResult = await fromObject(obj) + + expect(myResult.transformers.length).toEqual(2) + }) }) diff --git a/src/compose/ColumnTransformer.ts b/src/compose/ColumnTransformer.ts index 7109b65b..096337c1 100644 --- a/src/compose/ColumnTransformer.ts +++ b/src/compose/ColumnTransformer.ts @@ -76,6 +76,7 @@ export class ColumnTransformer extends Serialize { transformers = [], remainder = 'drop' }: ColumnTransformerParams = {}) { + super() this.transformers = transformers this.remainder = remainder } diff --git a/src/dummy/DummyClassifier.test.ts b/src/dummy/DummyClassifier.test.ts index 419a986b..9905da94 100644 --- a/src/dummy/DummyClassifier.test.ts +++ b/src/dummy/DummyClassifier.test.ts @@ -1,5 +1,4 @@ -import { DummyClassifier } from './DummyClassifier' -import { fromObject } from '../simpleSerializer' +import { DummyClassifier, fromObject } from '../index' describe('DummyClassifier', function () { it('Use DummyClassifier on simple example (mostFrequent)', function () { const clf = new DummyClassifier() diff --git a/src/dummy/DummyRegressor.test.ts b/src/dummy/DummyRegressor.test.ts index ebbe652d..6cc01a50 100644 --- a/src/dummy/DummyRegressor.test.ts +++ b/src/dummy/DummyRegressor.test.ts @@ -1,5 +1,5 @@ -import { DummyRegressor } from './DummyRegressor' -import { toObject, fromObject } from '../simpleSerializer' +import { DummyRegressor, fromObject } from '../index' + describe('DummyRegressor', function () { it('Use DummyRegressor on simple example (mean)', function () { const reg = new DummyRegressor() @@ -68,12 +68,13 @@ describe('DummyRegressor', function () { name: 'DummyRegressor', EstimatorType: 'regressor', strategy: 'constant', - constant: 10 + constant: 10, + quantile: undefined } reg.fit(X, y) - expect(saveResult).toEqual(await toObject(reg)) + expect(saveResult).toEqual(await reg.toObject()) }) it('Should load serialized DummyRegressor', async function () { @@ -92,7 +93,7 @@ describe('DummyRegressor', function () { ] reg.fit(X, y) - const saveReg = await toObject(reg) + const saveReg = await reg.toObject() const newReg = await fromObject(saveReg) expect(newReg.predict(predictX).arraySync()).toEqual([10, 10, 10]) diff --git a/src/ensemble/VotingClassifier.test.ts b/src/ensemble/VotingClassifier.test.ts index d741c00a..60f43da7 100644 --- a/src/ensemble/VotingClassifier.test.ts +++ b/src/ensemble/VotingClassifier.test.ts @@ -1,7 +1,10 @@ -import { makeVotingClassifier, VotingClassifier } from './VotingClassifier' -import { DummyClassifier } from '../dummy/DummyClassifier' - -import { LogisticRegression } from '../linear_model/LogisticRegression' +import { + makeVotingClassifier, + VotingClassifier, + DummyClassifier, + LogisticRegression, + fromObject +} from '../index' describe('VotingClassifier', function () { it('Use VotingClassifier on simple example (voting = hard)', async function () { @@ -118,8 +121,8 @@ describe('VotingClassifier', function () { await voter.fit(X, y) - const savedModel = (await voter.toJson()) as string - const newModel = new VotingClassifier({}).fromJson(savedModel) + const savedModel = await voter.toObject() + const newModel = await fromObject(savedModel) expect(newModel.predict(X).arraySync()).toEqual([1, 1, 1, 1, 1]) }, 30000) diff --git a/src/ensemble/VotingClassifier.ts b/src/ensemble/VotingClassifier.ts index 5db1241e..ce93c91a 100644 --- a/src/ensemble/VotingClassifier.ts +++ b/src/ensemble/VotingClassifier.ts @@ -2,7 +2,6 @@ import { Scikit1D, Scikit2D } from '../types' import { tf } from '../shared/globals' import { ClassifierMixin } from '../mixins' import { LabelEncoder } from '../preprocessing/LabelEncoder' -import { fromJson, toJson } from './serializeEnsemble' /* Next steps: @@ -154,15 +153,6 @@ export class VotingClassifier extends ClassifierMixin { ): Promise | Array> { return (await this.fit(X, y)).transform(X) } - - public fromJson(model: string) { - return fromJson(this, model) - } - - public async toJson(): Promise { - const classJson = JSON.parse(super.toJson() as string) - return toJson(this, classJson) - } } export function makeVotingClassifier(...args: any[]) { diff --git a/src/ensemble/VotingRegressor.test.ts b/src/ensemble/VotingRegressor.test.ts index 06a69f97..e0060196 100644 --- a/src/ensemble/VotingRegressor.test.ts +++ b/src/ensemble/VotingRegressor.test.ts @@ -1,6 +1,10 @@ -import { makeVotingRegressor, VotingRegressor } from './VotingRegressor' -import { DummyRegressor } from '../dummy/DummyRegressor' -import { LinearRegression } from '../linear_model/LinearRegression' +import { + makeVotingRegressor, + VotingRegressor, + fromObject, + DummyRegressor, + LinearRegression +} from '../index' describe('VotingRegressor', function () { it('Use VotingRegressor on simple example ', async function () { @@ -51,8 +55,8 @@ describe('VotingRegressor', function () { await voter.fit(X, y) - const savedModel = (await voter.toJson()) as string - const newModel = new VotingRegressor({}).fromJson(savedModel) + const savedModel = await voter.toObject() + const newModel = await fromObject(savedModel) expect(newModel.score(X, y)).toEqual(voter.score(X, y)) }, 30000) }) diff --git a/src/ensemble/VotingRegressor.ts b/src/ensemble/VotingRegressor.ts index db3d4973..41396e4d 100644 --- a/src/ensemble/VotingRegressor.ts +++ b/src/ensemble/VotingRegressor.ts @@ -1,7 +1,6 @@ import { Scikit1D, Scikit2D } from '../types' import { tf } from '../shared/globals' import { RegressorMixin } from '../mixins' -import { fromJson, toJson } from './serializeEnsemble' /* Next steps: 0. Write validation code to check Estimator inputs @@ -95,15 +94,6 @@ export class VotingRegressor extends RegressorMixin { public async fitTransform(X: Scikit2D, y: Scikit1D) { return (await this.fit(X, y)).transform(X) } - - public fromJson(model: string) { - return fromJson(this, model) as this - } - - public async toJson(): Promise { - const classJson = JSON.parse(super.toJson() as string) - return toJson(this, classJson) - } } /** diff --git a/src/ensemble/serializeEnsemble.ts b/src/ensemble/serializeEnsemble.ts deleted file mode 100644 index 245c89df..00000000 --- a/src/ensemble/serializeEnsemble.ts +++ /dev/null @@ -1,90 +0,0 @@ -import { DummyClassifier } from '../dummy/DummyClassifier' -import { DummyRegressor } from '../dummy/DummyRegressor' -import { LogisticRegression } from '../linear_model/LogisticRegression' -import { RidgeRegression } from '../linear_model/RidgeRegression' -import { LinearRegression } from '../linear_model/LinearRegression' -import { LassoRegression } from '../linear_model/LassoRegression' -import { ElasticNet } from '../linear_model/ElasticNet' -import { LabelEncoder } from '../preprocessing/LabelEncoder' -import { SimpleImputer } from '../impute/SimpleImputer' -import { tf } from '../shared/globals' -import { MinMaxScaler } from '../preprocessing/MinMaxScaler' - -function getEstimator(name: string, serialJson: string) { - switch (name) { - case 'DummyClassifier': - return new DummyClassifier().fromJson(serialJson) - case 'DummyRegressor': - return new DummyRegressor().fromJson(serialJson) - case 'LogisticRegression': - return new LogisticRegression().fromJson(serialJson) - case 'RidgeRegression': - return new RidgeRegression().fromJson(serialJson) - case 'LinearRegression': - return new LinearRegression().fromJson(serialJson) - case 'LassoRegression': - return new LassoRegression().fromJson(serialJson) - case 'ElasticNet': - return new ElasticNet().fromJson(serialJson) - case 'SimpleImputer': - return new SimpleImputer().fromJson(serialJson) - case 'MinMaxScaler': - return new MinMaxScaler().fromJson(serialJson) - default: - throw new Error(`${name} estimator not supported`) - } -} - -export function fromJson(classConstructor: any, model: string) { - let jsonClass = JSON.parse(model) - if (jsonClass.name != classConstructor.name) { - throw new Error( - `wrong json values for ${classConstructor.name} constructor` - ) - } - - const copyThis: any = Object.assign({}, classConstructor) - for (let key of Object.keys(classConstructor)) { - let value = copyThis[key] - if (value instanceof tf.Tensor) { - jsonClass[key] = tf.tensor(jsonClass[key]) - } - } - // for ensembles - if (jsonClass.estimators || jsonClass.steps) { - const jsonEstimatorOrStep = jsonClass.estimators || jsonClass.steps - for (let i = 0; i < jsonEstimatorOrStep.length; i++) { - const estimatorName = JSON.parse(jsonEstimatorOrStep[i][1]).name - const estimators = getEstimator(estimatorName, jsonEstimatorOrStep[i][1]) - jsonEstimatorOrStep[i][1] = Object.assign( - estimators, - jsonEstimatorOrStep[i][1] - ) - } - } - - if (jsonClass.le) { - const labelEncode = new LabelEncoder() - jsonClass.le = Object.assign(labelEncode, jsonClass.le) - } - return Object.assign(classConstructor, jsonClass) -} - -export async function toJson(classConstructor: any, classJson: any) { - let i = 0 - if (classConstructor.estimators) { - for (const estimator of classConstructor.estimators) { - classJson.estimators[i][1] = await estimator[1].toJson() - i += 1 - } - } - - if (classConstructor.steps) { - for (const step of classConstructor.steps) { - classJson.steps[i][1] = await step[1].toJson() - i += 1 - } - } - - return JSON.stringify(classJson) -} diff --git a/src/impute/SimpleImputer.test.ts b/src/impute/SimpleImputer.test.ts index c0969a80..2bad5c7b 100644 --- a/src/impute/SimpleImputer.test.ts +++ b/src/impute/SimpleImputer.test.ts @@ -1,6 +1,6 @@ import { tf } from '../shared/globals' -import { SimpleImputer } from './SimpleImputer' -import { toObject, fromObject } from '../simpleSerializer' +import { SimpleImputer, fromObject } from '../index' + describe('SimpleImputer', function () { it('Imputes with "constant" strategy 2D one column. In this strategy, we give the fill value', function () { const imputer = new SimpleImputer({ strategy: 'constant', fillValue: 3 }) @@ -141,7 +141,7 @@ describe('SimpleImputer', function () { } imputer.fitTransform(data) - expect(await toObject(imputer)).toEqual(expected) + expect(await imputer.toObject()).toEqual(expected) }) it('Should load serialized Imputer', async function () { const imputer = new SimpleImputer({ strategy: 'mostFrequent' }) @@ -163,7 +163,7 @@ describe('SimpleImputer', function () { ] imputer.fitTransform(data) - const thing = await toObject(imputer) + const thing = await imputer.toObject() const newImputer = await fromObject(thing) const newReturned = newImputer.transform(data) expect(newReturned.arraySync()).toEqual(expected) diff --git a/src/index.ts b/src/index.ts index 22917ec5..532f8be3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,5 +1,3 @@ -import Serialize from './serialize' - /** * @license * Copyright 2021, JsData. All rights reserved. @@ -15,6 +13,7 @@ import Serialize from './serialize' * ========================================================================== */ export { KNeighborsRegressor } from './neighbors/KNeighborsRegressor' +export { KNeighborsClassifier } from './neighbors/KNeighborsClassifier' export { LinearRegression, LinearRegressionParams @@ -85,7 +84,9 @@ export { DecisionTreeRegressor, DecisionTreeRegressorParams } from './tree/DecisionTree' - +export { KFold } from './model_selection/KFold' +export { trainTestSplit } from './model_selection/trainTestSplit' +export { crossValScore } from './model_selection/crossValScore' export { fromObject, Serialize } from './simpleSerializer' export { ClassificationCriterion, RegressionCriterion } from './tree/Criterion' diff --git a/src/linear_model/LinearRegression.test.ts b/src/linear_model/LinearRegression.test.ts index 1753fa4c..d4ca20a6 100644 --- a/src/linear_model/LinearRegression.test.ts +++ b/src/linear_model/LinearRegression.test.ts @@ -1,7 +1,6 @@ -import { LinearRegression } from './LinearRegression' +import { LinearRegression, fromObject } from '../index' import { tensorEqual } from '../utils' import { tf } from '../shared/globals' -import { toObject, fromObject } from '../simpleSerializer' function roughlyEqual(a: number, b: number, tol = 0.1) { return Math.abs(a - b) < tol } @@ -149,9 +148,7 @@ describe('LinearRegression', function () { await lr.fit(mediumX, yPlusJitter) const serialized = await lr.toObject() - console.log({ serialized }) const newModel = await fromObject(serialized) - console.log(newModel) expect(tensorEqual(newModel.coef, tf.tensor1d([2.5, 1]), 0.1)).toBe(true) expect(roughlyEqual(newModel.intercept as number, 0)).toBe(true) diff --git a/src/linear_model/LogisticRegression.test.ts b/src/linear_model/LogisticRegression.test.ts index 3dde416f..03243db3 100644 --- a/src/linear_model/LogisticRegression.test.ts +++ b/src/linear_model/LogisticRegression.test.ts @@ -1,6 +1,5 @@ -import { LogisticRegression } from './LogisticRegression' +import { LogisticRegression, fromObject } from '../index' import { tf } from '../shared/globals' -import { fromObject } from '../simpleSerializer' describe('LogisticRegression', function () { it('Works on arrays (small example)', async function () { const lr = new LogisticRegression() diff --git a/src/linear_model/SgdClassifier.ts b/src/linear_model/SgdClassifier.ts index e3579da6..06f3f539 100644 --- a/src/linear_model/SgdClassifier.ts +++ b/src/linear_model/SgdClassifier.ts @@ -20,7 +20,7 @@ import { Scikit2D, Scikit1D, OptimizerTypes, LossTypes } from '../types' import { OneHotEncoder } from '../preprocessing/OneHotEncoder' import { assert } from '../typesUtils' import { ClassifierMixin } from '../mixins' -import { fromJson, toJSON } from './modelSerializer' + /** * SGD is a thin Wrapper around Tensorflow's model api with a single dense layer. * With this base class and different error functions / regularizers we can @@ -404,22 +404,4 @@ export class SGDClassifier extends ClassifierMixin { private getModelWeight(): Promise> { return Promise.all(this.model.getWeights().map((weight) => weight.array())) } - - public async toJson(): Promise { - const classifierJson = JSON.parse(super.toJson() as string) - return toJSON(this, classifierJson) - } - - public fromJson(model: string) { - return fromJson(this, model) as this - } - - // public async toObject(): Promise { - // let { toObject } = await import('../simpleSerializer') - // return await toObject(this, [ - // 'modelCompileArgs', - // 'modelFitArgs', - // 'denseLayerArgs' - // ]) - // } } diff --git a/src/linear_model/SgdRegressor.ts b/src/linear_model/SgdRegressor.ts index d3bad4e4..e178889f 100644 --- a/src/linear_model/SgdRegressor.ts +++ b/src/linear_model/SgdRegressor.ts @@ -20,7 +20,6 @@ import { } from '../utils' import { Scikit2D, Scikit1D, OptimizerTypes, LossTypes } from '../types' import { RegressorMixin } from '../mixins' -import { fromJson, toJSON } from './modelSerializer' /** * SGD is a thin Wrapper around Tensorflow's model api with a single dense layer. @@ -209,15 +208,6 @@ export class SGDRegressor extends RegressorMixin { return this } - public async toObject(): Promise { - let { toObject } = await import('../simpleSerializer') - return await toObject(this, [ - 'modelCompileArgs', - 'modelFitArgs', - 'denseLayerArgs' - ]) - } - /** * Similar to scikit-learn, this returns the object of configuration params for SGD * @returns {SGDRegressorParams} Returns an object of configuration params. @@ -390,13 +380,4 @@ export class SGDRegressor extends RegressorMixin { return intercept } - - public async toJson(): Promise { - const classifierJson = JSON.parse(super.toJson() as string) - return toJSON(this, classifierJson) - } - - public fromJson(model: string) { - return fromJson(this, model) as this - } } diff --git a/src/linear_model/modelSerializer.ts b/src/linear_model/modelSerializer.ts deleted file mode 100644 index aac0aeba..00000000 --- a/src/linear_model/modelSerializer.ts +++ /dev/null @@ -1,91 +0,0 @@ -import { optimizer, initializer, getLoss } from '../utils' -import { tf } from '../shared/globals' -import { OneHotEncoder } from '../preprocessing/OneHotEncoder' - -function getModelWeight( - model: tf.Sequential -): Promise> { - return Promise.all(model.getWeights().map((weight) => weight.array())) -} - -export async function toJSON( - classConstructor: any, - classifierJson: any -): Promise { - const modelConfig = classConstructor.model.getConfig() - const modelWeight = await getModelWeight(classConstructor.model) - classifierJson.model = { - config: modelConfig, - weight: modelWeight - } - - if (classConstructor.denseLayerArgs.kernelInitializer) { - const initializerName = - classConstructor.denseLayerArgs.kernelInitializer.constructor.name - classifierJson.denseLayerArgs.kernelInitializer = initializerName - } - if (classConstructor.denseLayerArgs.biasInitializer) { - const biasName = - classConstructor.denseLayerArgs.biasInitializer.constructor.name - classifierJson.denseLayerArgs.biasInitializer = biasName - } - // set optimizer - classifierJson.modelCompileArgs.optimizer = - classConstructor.model.optimizer.getConfig() - return JSON.stringify(classifierJson) -} - -export function fromJson(classConstructor: any, model: string) { - let jsonClass = JSON.parse(model) - if (jsonClass.name != classConstructor.name) { - throw new Error( - `wrong json values for ${classConstructor.name} constructor` - ) - } - - const jsonModel = tf.Sequential.fromConfig( - tf.Sequential, - jsonClass.model.config - ) as tf.Sequential - const jsonOpt = optimizer(jsonClass.optimizerType) - const optim = Object.assign(jsonOpt, jsonClass.modelCompileArgs.optimizer) - const loss = getLoss(jsonClass.lossType) - jsonClass.modelCompileArgs = { - ...jsonClass.modelCompileArgs, - optimizer: optim, - loss: loss - } - - jsonModel.compile(jsonClass.modelCompileArgs) - const weights = [] - for (const weight of jsonClass.model.weight) { - weights.push(tf.tensor(weight)) - } - jsonModel.setWeights(weights) - jsonClass.model = jsonModel - - // if call back create callback - // default usecase is set to EarlyStop - // might get complex for custom callback - if (jsonClass.modelFitArgs.callbacks) { - let jsonCallback = tf.callbacks.earlyStopping() - let modelFitArgs = jsonClass.modelFitArgs - jsonCallback = Object.assign(jsonCallback, modelFitArgs.callbacks[0]) - modelFitArgs.callbacks = [jsonCallback] - } - - if (jsonClass.denseLayerArgs.kernelInitializer) { - let initializerName = jsonClass.denseLayerArgs.kernelInitializer - jsonClass.denseLayerArgs.kernelInitializer = initializer(initializerName) - } - if (jsonClass.denseLayerArgs.biasInitializer) { - let biasName = jsonClass.denseLayerArgs.biasInitializer - jsonClass.denseLayerArgs.biasInitializer = initializer(biasName) - } - - if (jsonClass.oneHot) { - let jsonOneHotEncoder = new OneHotEncoder() - jsonClass.oneHot = Object.assign(jsonOneHotEncoder, jsonClass.oneHot) - } - return Object.assign(classConstructor, jsonClass) -} diff --git a/src/mixins.ts b/src/mixins.ts index 38d9de8e..20085282 100644 --- a/src/mixins.ts +++ b/src/mixins.ts @@ -1,6 +1,5 @@ import { Scikit2D, Scikit1D } from './types' import { r2Score, accuracyScore } from './metrics/metrics' -// import Serialize from './serialize' import { Serialize } from './simpleSerializer' import { tf } from './shared/globals' export class TransformerMixin extends Serialize { diff --git a/src/naive_bayes/BaseNaiveBayes.ts b/src/naive_bayes/BaseNaiveBayes.ts index 563c8fa3..fd48b42b 100644 --- a/src/naive_bayes/BaseNaiveBayes.ts +++ b/src/naive_bayes/BaseNaiveBayes.ts @@ -16,7 +16,7 @@ import { polyfillUnique } from '../tfUtils' import { tf } from '../shared/globals' import { Scikit1D, Scikit2D } from '../types' import { convertToNumericTensor2D, convertToTensor1D } from '../utils' -import Serialize from '../serialize' +import { Serialize } from '../simpleSerializer' export interface NaiveBayesParams { /** @@ -152,37 +152,4 @@ export abstract class BaseNaiveBayes extends Serialize { mean: tf.Tensor1D, variance: tf.Tensor1D ): tf.Tensor1D - - public toJson(): string { - const jsonClass = JSON.parse(super.toJson() as string) - - if (this.priors) { - jsonClass.priors = this.priors.arraySync() - } - jsonClass.classes = this.classes.arraySync() - jsonClass.means = this.means.map((t: tf.Tensor1D) => t.arraySync()) - jsonClass.variances = this.variances.map((v: tf.Tensor1D) => v.arraySync()) - return JSON.stringify(jsonClass) - } - - public fromJson(model: string) { - const jsonModel = JSON.parse(model) - - if (jsonModel.priors) { - jsonModel.priors = tf.tensor(jsonModel.priors) - } - jsonModel.classes = tf.tensor(jsonModel.classes) - - const means = [] - for (const wMeans of jsonModel.means) { - means.push(tf.tensor(wMeans)) - } - const variances = [] - for (const variance of jsonModel.variances) { - variances.push(tf.tensor(variance)) - } - jsonModel.means = means - jsonModel.variances = variances - return Object.assign(this, jsonModel) as this - } } diff --git a/src/naive_bayes/GaussianNB.test.ts b/src/naive_bayes/GaussianNB.test.ts index abce7a22..093f2151 100644 --- a/src/naive_bayes/GaussianNB.test.ts +++ b/src/naive_bayes/GaussianNB.test.ts @@ -12,8 +12,7 @@ * limitations under the License. * ========================================================================== */ -import { GaussianNB } from './GaussianNB' -import { toObject, fromObject } from '../simpleSerializer' +import { GaussianNB, fromObject } from '../index' describe('GaussianNB', function () { it('without priors', async () => { @@ -100,9 +99,9 @@ describe('GaussianNB', function () { const model = new GaussianNB({ priors: [0.5, 0.5], varSmoothing: 1.0 }) await model.fit(X, y) - const labels = model.predict(X) + model.predict(X) - const serializeModel = await toObject(model) + const serializeModel = await model.toObject() const newModel = await fromObject(serializeModel) expect(newModel.predict(X).arraySync()).toEqual([0, 0, 1, 1, 1]) }) diff --git a/src/neighbors/KNeighborsBase.ts b/src/neighbors/KNeighborsBase.ts index 3b4dd715..31774e18 100644 --- a/src/neighbors/KNeighborsBase.ts +++ b/src/neighbors/KNeighborsBase.ts @@ -21,7 +21,7 @@ import { convertToNumericTensor1D, convertToNumericTensor2D } from '../utils' import { assert } from '../typesUtils' import { tf } from '../shared/globals' import { KdTree } from './KdTree' -import Serialize from '../serialize' +import { Serialize } from '../simpleSerializer' const WEIGHTS_FUNCTIONS = { uniform(distances: tf.Tensor2D) { diff --git a/src/neighbors/KNeighborsRegressor.test.ts b/src/neighbors/KNeighborsRegressor.test.ts index 50c5c418..751fb8fb 100644 --- a/src/neighbors/KNeighborsRegressor.test.ts +++ b/src/neighbors/KNeighborsRegressor.test.ts @@ -13,12 +13,10 @@ * ========================================================================== */ -import { KNeighborsRegressor } from './KNeighborsRegressor' +import { KNeighborsRegressor, crossValScore, KFold } from '../index' import { KNeighborsParams } from './KNeighborsBase' import { dataUrls } from '../datasets/datasets' import { arrayEqual } from '../utils' -import { crossValScore } from '../model_selection/crossValScore' -import { KFold } from '../model_selection/KFold' import { negMeanSquaredError } from '../model_selection/scorers' import '../jestTensorMatchers' import * as dfd from 'danfojs-node' diff --git a/src/pipeline/Pipeline.test.ts b/src/pipeline/Pipeline.test.ts index 7c10306b..18e77d1c 100644 --- a/src/pipeline/Pipeline.test.ts +++ b/src/pipeline/Pipeline.test.ts @@ -1,10 +1,13 @@ -import { Pipeline, makePipeline } from './Pipeline' +import { + Pipeline, + makePipeline, + LinearRegression, + SimpleImputer, + MinMaxScaler, + fromObject +} from '../index' import { tf } from '../shared/globals' import { tensorEqual } from '../utils' -import { LinearRegression } from '../linear_model/LinearRegression' -import { SimpleImputer } from '../impute/SimpleImputer' -import { MinMaxScaler } from '../preprocessing/MinMaxScaler' -import { toObject, fromObject } from '../simpleSerializer' describe('Pipeline', function () { it('Use a Pipeline (min-max scaler, and linear regression)', async function () { @@ -97,8 +100,7 @@ describe('Pipeline', function () { await pipeline.fit(X, y) - const saveModel = await toObject(pipeline) - console.log(saveModel) + const saveModel = await pipeline.toObject() const newPipeLine = await fromObject(saveModel) expect(newPipeLine.steps[1][1].min.arraySync()).toEqual([0, 0]) diff --git a/src/pipeline/Pipeline.ts b/src/pipeline/Pipeline.ts index 6cf5e6c4..a2c5e923 100644 --- a/src/pipeline/Pipeline.ts +++ b/src/pipeline/Pipeline.ts @@ -1,8 +1,7 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ import { assert } from '../typesUtils' import { Scikit1D, Scikit2D } from '../types' -import Serialize from '../serialize' -import { toJson, fromJson } from '../ensemble/serializeEnsemble' +import { Serialize } from '../simpleSerializer' import { tf } from '../shared/globals' /* @@ -206,15 +205,6 @@ export class Pipeline extends Serialize { let XT = this.fitTransformExceptLast(X) return await lastEstimator.fitPredict(XT, y) } - - public async toJson(): Promise { - const classJson = JSON.parse(super.toJson() as string) - return toJson(this, classJson) - } - - public fromJson(model: string) { - return fromJson(this, model) as this - } } /** diff --git a/src/preprocessing/LabelEncoder.test.ts b/src/preprocessing/LabelEncoder.test.ts index 062929c8..b56ede1a 100644 --- a/src/preprocessing/LabelEncoder.test.ts +++ b/src/preprocessing/LabelEncoder.test.ts @@ -1,4 +1,4 @@ -import { LabelEncoder } from './LabelEncoder' +import { LabelEncoder } from '../index' import * as dfd from 'danfojs-node' describe('LabelEncoder', function () { diff --git a/src/preprocessing/LabelEncoder.ts b/src/preprocessing/LabelEncoder.ts index 9067e707..6173f31b 100644 --- a/src/preprocessing/LabelEncoder.ts +++ b/src/preprocessing/LabelEncoder.ts @@ -16,7 +16,7 @@ import { Scikit1D } from '../types' import { tf } from '../shared/globals' import { isSeriesInterface } from '../typesUtils' -import Serialize from '../serialize' +import { Serialize } from '../simpleSerializer' /* Next steps: diff --git a/src/preprocessing/MaxAbsScaler.test.ts b/src/preprocessing/MaxAbsScaler.test.ts index b69f584a..91781bab 100644 --- a/src/preprocessing/MaxAbsScaler.test.ts +++ b/src/preprocessing/MaxAbsScaler.test.ts @@ -1,4 +1,4 @@ -import { MaxAbsScaler } from './MaxAbsScaler' +import { MaxAbsScaler, fromObject } from '../index' import * as dfd from 'danfojs-node' import { tf } from '../shared/globals' import { arrayEqual } from '../utils' @@ -135,6 +135,20 @@ describe('MaxAbsScaler', function () { expect(arrayEqual(X_trans_new, X_expected_new, 0.01)).toBe(true) }) + it('Serialize and unserialize MaxAbsScaler', async function () { + const data = tf.tensor2d([4, 4, 'whoops', 3, 3] as any, [5, 1]) + const scaler = new MaxAbsScaler() + scaler.fit(data) + const serial = await scaler.toObject() + const newModel = await fromObject(serial) + expect(newModel.transform(data).arraySync().flat()).toEqual([ + 1, + 1, + NaN, + 0.75, + 0.75 + ]) + }) /* Streaming test def test_maxabs_scaler_partial_fit(): # Test if partial_fit run over many batches of size 1 and 50 diff --git a/src/preprocessing/MinMaxScaler.test.ts b/src/preprocessing/MinMaxScaler.test.ts index 6e995b47..b0bf1420 100644 --- a/src/preprocessing/MinMaxScaler.test.ts +++ b/src/preprocessing/MinMaxScaler.test.ts @@ -1,9 +1,8 @@ -import { MinMaxScaler } from './MinMaxScaler' +import { MinMaxScaler, fromObject } from '../index' import * as dfd from 'danfojs-node' import { isDataFrameInterface, isSeriesInterface } from '../typesUtils' import { ScikitVecOrMatrix } from '../types' import { tf } from '../shared/globals' -import { toObject, fromObject } from '../simpleSerializer' export function convertTensorToInputType( tensor: tf.Tensor, inputData: ScikitVecOrMatrix @@ -165,7 +164,7 @@ describe('MinMaxscaler', function () { const data = tf.tensor2d([4, 4, 'whoops', 3, 3] as any, [5, 1]) const scaler = new MinMaxScaler() scaler.fit(data) - const serial = (await toObject(scaler)) as string + const serial = await scaler.toObject() const newModel = await fromObject(serial) expect(newModel.transform(data).arraySync().flat()).toEqual([ 1, diff --git a/src/preprocessing/Normalizer.test.ts b/src/preprocessing/Normalizer.test.ts index 65d20877..b202c377 100644 --- a/src/preprocessing/Normalizer.test.ts +++ b/src/preprocessing/Normalizer.test.ts @@ -1,4 +1,4 @@ -import { Normalizer } from './Normalizer' +import { Normalizer } from '../index' import * as dfd from 'danfojs-node' import { arrayEqual } from '../utils' diff --git a/src/preprocessing/OneHotEncoder.test.ts b/src/preprocessing/OneHotEncoder.test.ts index 35100413..2ddd7fce 100644 --- a/src/preprocessing/OneHotEncoder.test.ts +++ b/src/preprocessing/OneHotEncoder.test.ts @@ -1,5 +1,5 @@ import { tf } from '../shared/globals' -import { OneHotEncoder } from './OneHotEncoder' +import { OneHotEncoder } from '../index' import { arrayTo2DColumn } from '../utils' describe('OneHotEncoder', function () { diff --git a/src/preprocessing/OrdinalEncoder.test.ts b/src/preprocessing/OrdinalEncoder.test.ts index 57666262..53438b53 100644 --- a/src/preprocessing/OrdinalEncoder.test.ts +++ b/src/preprocessing/OrdinalEncoder.test.ts @@ -1,4 +1,4 @@ -import { OrdinalEncoder } from './OrdinalEncoder' +import { OrdinalEncoder } from '../index' import { arrayTo2DColumn } from '../utils' describe('OrdinalEncoder', function () { diff --git a/src/preprocessing/RobustScaler.test.ts b/src/preprocessing/RobustScaler.test.ts index 3a31475e..3653c9d4 100644 --- a/src/preprocessing/RobustScaler.test.ts +++ b/src/preprocessing/RobustScaler.test.ts @@ -1,4 +1,4 @@ -import { RobustScaler } from './RobustScaler' +import { RobustScaler } from '../index' import * as dfd from 'danfojs-node' import { arrayEqual } from '../utils' diff --git a/src/preprocessing/StandardScaler.test.ts b/src/preprocessing/StandardScaler.test.ts index e3830ef7..d80621d6 100644 --- a/src/preprocessing/StandardScaler.test.ts +++ b/src/preprocessing/StandardScaler.test.ts @@ -1,4 +1,4 @@ -import { StandardScaler } from './StandardScaler' +import { StandardScaler } from '../index' import * as dfd from 'danfojs-node' describe('StandardScaler', function () { diff --git a/src/serialize.ts b/src/serialize.ts deleted file mode 100644 index 2ef36c94..00000000 --- a/src/serialize.ts +++ /dev/null @@ -1,50 +0,0 @@ -/** - * A Generic class to serialized and Unserialized classes (models, transformers, - * or any operator) - */ - -import { tf } from './shared/globals' -export default class Serialize { - public name = 'Serialize' // default name for all inherited class - - /** - * Serialize all [inherited] class property into - * a json string - * @returns Json string - */ - public toJson(): string | Promise { - const thisCopy: any = Object.assign({}, this) - for (const key of Object.keys(thisCopy)) { - let value = thisCopy[key] - if (value instanceof tf.Tensor) { - thisCopy[key] = { - type: 'Tensor', - value: value.arraySync() - } - } - } - return JSON.stringify(thisCopy) - } - - /** - * Initialize [inherited] class from serialized - * json string - * @param model string - * @returns [Inherited] Class - */ - public fromJson(model: string) { - let jsonClass = JSON.parse(model) - if (jsonClass.name != this.name) { - throw new Error(`wrong json values for ${this.name} constructor`) - } - - for (let key of Object.keys(jsonClass)) { - let value = jsonClass[key] - if (typeof value === 'object' && value?.type === 'Tensor') { - jsonClass[key] = tf.tensor(jsonClass[key].value) - } - } - - return Object.assign(this, jsonClass) as this - } -} diff --git a/src/simpleSerializer.ts b/src/simpleSerializer.ts index ca8ee217..bcd8d714 100644 --- a/src/simpleSerializer.ts +++ b/src/simpleSerializer.ts @@ -91,7 +91,9 @@ export async function toObjectInner( } // Array case if (Array.isArray(val)) { - return await Promise.all(val.map(async (el) => await toObjectInner(el))) + return await Promise.all( + val.map(async (el) => await toObjectInner(el, ignoreKeys)) + ) } // Serialize a Tensor @@ -128,12 +130,6 @@ export async function toObjectInner( } } - if (EstimatorList.includes(val.name)) { - if (val.toObject) { - return val.toObject() - } - } - // Generic object case / class case let response: any = {} for (let key of Object.keys(val)) { @@ -145,7 +141,7 @@ export async function toObjectInner( // if (typeof val[key] === 'function') { // continue // } - response[key] = await toObjectInner(val[key]) + response[key] = await toObjectInner(val[key], ignoreKeys) } return response } @@ -212,10 +208,16 @@ export async function fromObject(val: any): Promise { } } +let ignoredKeysForSGDRegressor = [ + 'modelCompileArgs', + 'modelFitArgs', + 'denseLayerArgs' +] + export class Serialize { - async toObject(ignoreKeys: string[] = []): Promise { + async toObject(): Promise { try { - return await toObjectInner(this, ignoreKeys) + return await toObjectInner(this, ignoredKeysForSGDRegressor) } catch (e) { console.error(e) } diff --git a/src/tree/Criterion.test.ts b/src/tree/Criterion.test.ts index e2ed5a32..f9ab852c 100644 --- a/src/tree/Criterion.test.ts +++ b/src/tree/Criterion.test.ts @@ -1,5 +1,5 @@ import { ClassificationCriterion, giniCoefficient, entropy } from './Criterion' -import { toObject, fromObject } from '../simpleSerializer' +import { fromObject } from '../simpleSerializer' describe('Criterion', function () { let X = [ [-2, -1], @@ -83,7 +83,7 @@ describe('Criterion', function () { y }) criterion.init(0, 6, sampleMap) - const serial = await toObject(criterion) + const serial = await criterion.toObject() const newCriterion = await fromObject(serial) expect(newCriterion.nodeImpurity()).toEqual(1) }, 1000) diff --git a/src/tree/Criterion.ts b/src/tree/Criterion.ts index 3f57c2de..ef846390 100644 --- a/src/tree/Criterion.ts +++ b/src/tree/Criterion.ts @@ -1,6 +1,5 @@ -import { assert } from '../typesUtils' import { int } from '../randUtils' -import Serialize from '../serialize' +import { Serialize } from '../simpleSerializer' export type ImpurityMeasure = 'gini' | 'entropy' | 'squared_error' diff --git a/src/tree/DecisionTree.test.ts b/src/tree/DecisionTree.test.ts index d2fc7728..c5933bd1 100644 --- a/src/tree/DecisionTree.test.ts +++ b/src/tree/DecisionTree.test.ts @@ -1,7 +1,7 @@ import { DecisionTreeClassifier, DecisionTreeRegressor } from './DecisionTree' import { dataUrls } from '../datasets/datasets' import * as dfd from 'danfojs-node' -import { toObject, fromObject } from '../simpleSerializer' +import { fromObject } from '../simpleSerializer' describe('DecisionTree', function () { it('Use the DecisionTree (toy)', async function () { @@ -621,7 +621,7 @@ describe('DecisionTree', function () { let tree_classifier = new DecisionTreeClassifier() tree_classifier.fit(X, y) - const serial = await toObject(tree_classifier) + const serial = await tree_classifier.toObject() const newTree = await fromObject(serial) expect(newTree.predict(T)).toEqual(true_result) }, 1000) diff --git a/src/tree/DecisionTree.ts b/src/tree/DecisionTree.ts index a4659451..8e654f6a 100644 --- a/src/tree/DecisionTree.ts +++ b/src/tree/DecisionTree.ts @@ -8,7 +8,7 @@ import { validateX, validateY } from './utils' import { Scikit1D, Scikit2D } from '../types' import { convertScikit2DToArray, convertScikit1DToArray } from '../utils' import { LabelEncoder } from '../preprocessing/LabelEncoder' -import Serialize from '../serialize' +import { Serialize } from '../simpleSerializer' /* Next steps: @@ -155,6 +155,7 @@ export class DecisionTreeBase extends Serialize { X: number[][] = [] y: number[] = [] labelEncoder?: LabelEncoder + name: string constructor({ criterion = 'gini', @@ -300,37 +301,6 @@ export class DecisionTreeBase extends Serialize { this.tree.populateChildIds() this.tree.isBuilt = true } - - public toJson(): string { - const jsonClass = JSON.parse(super.toJson() as string) - - if (this.splitter) { - jsonClass.splitter = this.splitter.toJson() as string - } - if (this.labelEncoder) { - jsonClass.labelEncoder = this.labelEncoder.toJson() - } - return JSON.stringify(jsonClass) - } - - public fromJson(model: string) { - const jsonClass = JSON.parse(model) - - if (jsonClass.tree) { - const tree = new DecisionTree() - jsonClass.tree = Object.assign(tree, jsonClass.tree) - } - - if (jsonClass.splitter) { - jsonClass.splitter = Splitter.fromJson(jsonClass.splitter) - } - if (jsonClass.labelEncoder) { - jsonClass.labelEncoder = new LabelEncoder().fromJson( - jsonClass.labelEncoder - ) - } - return Object.assign(this, jsonClass) as this - } } export interface DecisionTreeClassifierParams { diff --git a/src/tree/Splitter.test.ts b/src/tree/Splitter.test.ts index a8b164f1..35481658 100644 --- a/src/tree/Splitter.test.ts +++ b/src/tree/Splitter.test.ts @@ -1,6 +1,6 @@ import { ImpurityMeasure } from './Criterion' import { Splitter } from './Splitter' -import { toObject, fromObject } from '../simpleSerializer' +import { fromObject } from '../simpleSerializer' describe('Splitter', function () { let types = ['gini', 'entropy', 'squared_error'] @@ -203,7 +203,7 @@ describe('Splitter', function () { samplesSubset: [] }) splitter.splitNode() - const serial = await toObject(splitter) + const serial = await splitter.toObject() const newSplitter = await fromObject(serial) const newBestSplitter = newSplitter.splitNode() expect(newBestSplitter.foundSplit).toEqual(true) diff --git a/src/tree/Splitter.ts b/src/tree/Splitter.ts index c53750a9..8a9c9be8 100644 --- a/src/tree/Splitter.ts +++ b/src/tree/Splitter.ts @@ -5,7 +5,7 @@ import { } from './Criterion' import shuffle from 'lodash/shuffle' import { int } from '../randUtils' -import Serialize from '../serialize' +import { Serialize } from '../simpleSerializer' export interface Split { feature: int @@ -215,44 +215,4 @@ export class Splitter extends Serialize { return currentSplit } } - - public toJson(): string { - const jsonClass = JSON.parse(super.toJson() as string) - - if (jsonClass.criterion) { - jsonClass.criterion = this.criterion.toJson() as string - } - if (this.sampleMap) jsonClass.sampleMap = Array.from(this.sampleMap) - return JSON.stringify(jsonClass) - } - - static fromJson(model: string) { - const jsonClass = JSON.parse(model) - - if (jsonClass.criterion) { - const criterionName = JSON.parse(jsonClass.criterion).name - if (criterionName == 'classificationCriterion') { - jsonClass.criterion = ClassificationCriterion.fromJson( - jsonClass.criterion - ) - } else { - jsonClass.criterion = RegressionCriterion.fromJson(jsonClass.criterion) - } - } - - if (jsonClass.sampleMap) { - jsonClass.sampleMap = new Int32Array(jsonClass.sampleMap) - } - - const splitter = new Splitter({ - X: jsonClass.X, - y: jsonClass.y, - minSamplesLeaf: jsonClass.minSamplesLeaf, - impurityMeasure: 'squared_error', - maxFeatures: jsonClass.maxFeatures, - samplesSubset: jsonClass.samplesSubset - }) - - return Object.assign(splitter, jsonClass) as Splitter - } } From 260c1347d7c06c494b24b813c00c11d68c4da354 Mon Sep 17 00:00:00 2001 From: Dan Crescimanno Date: Sun, 8 May 2022 09:17:24 -0700 Subject: [PATCH 3/4] feat: updated test --- docs/convert.js | 1 - package-lock.json | 14 ++++++++++++++ package.json | 1 + src/cluster/KMeans.test.ts | 6 +++--- src/compose/ColumnTransformer.test.ts | 6 +++--- src/dummy/DummyClassifier.test.ts | 6 +++--- src/dummy/DummyRegressor.test.ts | 6 +++--- src/ensemble/VotingClassifier.test.ts | 6 +++--- src/ensemble/VotingRegressor.test.ts | 6 +++--- src/impute/SimpleImputer.test.ts | 6 +++--- src/index.ts | 2 +- src/linear_model/LinearRegression.test.ts | 6 +++--- src/linear_model/LogisticRegression.test.ts | 6 +++--- src/model_selection/KFold.test.ts | 2 +- src/naive_bayes/GaussianNB.test.ts | 6 +++--- src/neighbors/KNeighborsClassifier.test.ts | 2 +- src/pipeline/Pipeline.test.ts | 6 +++--- src/preprocessing/MaxAbsScaler.test.ts | 6 +++--- src/preprocessing/MinMaxScaler.test.ts | 6 +++--- src/simpleSerializer.ts | 13 +++++++++++-- src/tree/Criterion.test.ts | 6 +++--- src/tree/DecisionTree.test.ts | 6 +++--- src/tree/Splitter.test.ts | 6 +++--- 23 files changed, 77 insertions(+), 54 deletions(-) diff --git a/docs/convert.js b/docs/convert.js index ef76d67e..ddd5c88d 100644 --- a/docs/convert.js +++ b/docs/convert.js @@ -189,7 +189,6 @@ function getTypeName(val, bigObj) { } function generateProperties(jsonClass, bigObj) { - // console.log(jsonClass.children) let interface = getInterfaceForClass(jsonClass, bigObj) let allConstructorArgs = [] if (interface && interface.children) { diff --git a/package-lock.json b/package-lock.json index c40cd259..131322a0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,6 +12,7 @@ "dependencies": { "@tensorflow/tfjs": "^3.16.0", "@tensorflow/tfjs-node": "^3.16.0", + "base64-arraybuffer": "^1.0.2", "lodash": "^4.17.21", "mathjs": "^10.0.0", "simple-statistics": "^7.7.0" @@ -4757,6 +4758,14 @@ "version": "1.0.2", "license": "MIT" }, + "node_modules/base64-arraybuffer": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/base64-arraybuffer/-/base64-arraybuffer-1.0.2.tgz", + "integrity": "sha512-I3yl4r9QB5ZRY3XuJVEPfc2XhZO6YweFPI+UovAzn+8/hb3oJ6lnysaFcjVpkCPfVWFUDvoZ8kmVDP7WyRtYtQ==", + "engines": { + "node": ">= 0.6.0" + } + }, "node_modules/base64id": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/base64id/-/base64id-2.0.0.tgz", @@ -19938,6 +19947,11 @@ "balanced-match": { "version": "1.0.2" }, + "base64-arraybuffer": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/base64-arraybuffer/-/base64-arraybuffer-1.0.2.tgz", + "integrity": "sha512-I3yl4r9QB5ZRY3XuJVEPfc2XhZO6YweFPI+UovAzn+8/hb3oJ6lnysaFcjVpkCPfVWFUDvoZ8kmVDP7WyRtYtQ==" + }, "base64id": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/base64id/-/base64id-2.0.0.tgz", diff --git a/package.json b/package.json index 2360944b..071963eb 100644 --- a/package.json +++ b/package.json @@ -51,6 +51,7 @@ "dependencies": { "@tensorflow/tfjs": "^3.16.0", "@tensorflow/tfjs-node": "^3.16.0", + "base64-arraybuffer": "^1.0.2", "lodash": "^4.17.21", "mathjs": "^10.0.0", "simple-statistics": "^7.7.0" diff --git a/src/cluster/KMeans.test.ts b/src/cluster/KMeans.test.ts index ac737fc5..b5f3ce2a 100644 --- a/src/cluster/KMeans.test.ts +++ b/src/cluster/KMeans.test.ts @@ -1,4 +1,4 @@ -import { fromObject, KMeans } from '../index' +import { fromJSON, KMeans } from '../index' // Next steps: Improve on kmeans cluster testing describe('KMeans', () => { const X = [ @@ -68,8 +68,8 @@ describe('KMeans', () => { ] const kmean = new KMeans({ nClusters: 2, randomState: 0 }) kmean.fit(X) - const ksave = await kmean.toObject() - const ksaveModel = await fromObject(ksave) + const ksave = await kmean.toJSON() + const ksaveModel = await fromJSON(ksave) expect(centroids).toEqual(ksaveModel.clusterCenters.arraySync()) }) diff --git a/src/compose/ColumnTransformer.test.ts b/src/compose/ColumnTransformer.test.ts index 9dd79e1b..b31a918e 100644 --- a/src/compose/ColumnTransformer.test.ts +++ b/src/compose/ColumnTransformer.test.ts @@ -1,5 +1,5 @@ import { - fromObject, + fromJSON, SimpleImputer, MinMaxScaler, ColumnTransformer @@ -50,8 +50,8 @@ describe('ColumnTransformer', function () { }) transformer.fitTransform(newDf) - let obj = await transformer.toObject() - let myResult = await fromObject(obj) + let obj = await transformer.toJSON() + let myResult = await fromJSON(obj) expect(myResult.transformers.length).toEqual(2) }) diff --git a/src/dummy/DummyClassifier.test.ts b/src/dummy/DummyClassifier.test.ts index 9905da94..b1f0e920 100644 --- a/src/dummy/DummyClassifier.test.ts +++ b/src/dummy/DummyClassifier.test.ts @@ -1,4 +1,4 @@ -import { DummyClassifier, fromObject } from '../index' +import { DummyClassifier, fromJSON } from '../index' describe('DummyClassifier', function () { it('Use DummyClassifier on simple example (mostFrequent)', function () { const clf = new DummyClassifier() @@ -84,8 +84,8 @@ describe('DummyClassifier', function () { const y = [10, 20, 20, 30] clf.fit(X, y) - const clfSave = await clf.toObject() - const newClf = await fromObject(clfSave) + const clfSave = await clf.toJSON() + const newClf = await fromJSON(clfSave) expect(clf).toEqual(newClf) }) }) diff --git a/src/dummy/DummyRegressor.test.ts b/src/dummy/DummyRegressor.test.ts index 6cc01a50..6c79feb7 100644 --- a/src/dummy/DummyRegressor.test.ts +++ b/src/dummy/DummyRegressor.test.ts @@ -1,4 +1,4 @@ -import { DummyRegressor, fromObject } from '../index' +import { DummyRegressor, fromJSON } from '../index' describe('DummyRegressor', function () { it('Use DummyRegressor on simple example (mean)', function () { @@ -93,8 +93,8 @@ describe('DummyRegressor', function () { ] reg.fit(X, y) - const saveReg = await reg.toObject() - const newReg = await fromObject(saveReg) + const saveReg = await reg.toJSON() + const newReg = await fromJSON(saveReg) expect(newReg.predict(predictX).arraySync()).toEqual([10, 10, 10]) }) diff --git a/src/ensemble/VotingClassifier.test.ts b/src/ensemble/VotingClassifier.test.ts index 60f43da7..cafb9973 100644 --- a/src/ensemble/VotingClassifier.test.ts +++ b/src/ensemble/VotingClassifier.test.ts @@ -3,7 +3,7 @@ import { VotingClassifier, DummyClassifier, LogisticRegression, - fromObject + fromJSON } from '../index' describe('VotingClassifier', function () { @@ -121,8 +121,8 @@ describe('VotingClassifier', function () { await voter.fit(X, y) - const savedModel = await voter.toObject() - const newModel = await fromObject(savedModel) + const savedModel = await voter.toJSON() + const newModel = await fromJSON(savedModel) expect(newModel.predict(X).arraySync()).toEqual([1, 1, 1, 1, 1]) }, 30000) diff --git a/src/ensemble/VotingRegressor.test.ts b/src/ensemble/VotingRegressor.test.ts index e0060196..7782ab86 100644 --- a/src/ensemble/VotingRegressor.test.ts +++ b/src/ensemble/VotingRegressor.test.ts @@ -1,7 +1,7 @@ import { makeVotingRegressor, VotingRegressor, - fromObject, + fromJSON, DummyRegressor, LinearRegression } from '../index' @@ -55,8 +55,8 @@ describe('VotingRegressor', function () { await voter.fit(X, y) - const savedModel = await voter.toObject() - const newModel = await fromObject(savedModel) + const savedModel = await voter.toJSON() + const newModel = await fromJSON(savedModel) expect(newModel.score(X, y)).toEqual(voter.score(X, y)) }, 30000) }) diff --git a/src/impute/SimpleImputer.test.ts b/src/impute/SimpleImputer.test.ts index 2bad5c7b..d9957b5e 100644 --- a/src/impute/SimpleImputer.test.ts +++ b/src/impute/SimpleImputer.test.ts @@ -1,5 +1,5 @@ import { tf } from '../shared/globals' -import { SimpleImputer, fromObject } from '../index' +import { SimpleImputer, fromJSON } from '../index' describe('SimpleImputer', function () { it('Imputes with "constant" strategy 2D one column. In this strategy, we give the fill value', function () { @@ -163,8 +163,8 @@ describe('SimpleImputer', function () { ] imputer.fitTransform(data) - const thing = await imputer.toObject() - const newImputer = await fromObject(thing) + const thing = await imputer.toJSON() + const newImputer = await fromJSON(thing) const newReturned = newImputer.transform(data) expect(newReturned.arraySync()).toEqual(expected) expect(newImputer.transform([[NaN, NaN]]).arraySync()).toEqual([[4, 3]]) diff --git a/src/index.ts b/src/index.ts index 532f8be3..95c1e491 100644 --- a/src/index.ts +++ b/src/index.ts @@ -87,7 +87,7 @@ export { export { KFold } from './model_selection/KFold' export { trainTestSplit } from './model_selection/trainTestSplit' export { crossValScore } from './model_selection/crossValScore' -export { fromObject, Serialize } from './simpleSerializer' +export { fromObject, fromJSON, Serialize } from './simpleSerializer' export { ClassificationCriterion, RegressionCriterion } from './tree/Criterion' export { Splitter } from './tree/Splitter' diff --git a/src/linear_model/LinearRegression.test.ts b/src/linear_model/LinearRegression.test.ts index d4ca20a6..1a85e235 100644 --- a/src/linear_model/LinearRegression.test.ts +++ b/src/linear_model/LinearRegression.test.ts @@ -1,4 +1,4 @@ -import { LinearRegression, fromObject } from '../index' +import { LinearRegression, fromJSON } from '../index' import { tensorEqual } from '../utils' import { tf } from '../shared/globals' function roughlyEqual(a: number, b: number, tol = 0.1) { @@ -147,8 +147,8 @@ describe('LinearRegression', function () { const lr = new LinearRegression({ fitIntercept: false }) await lr.fit(mediumX, yPlusJitter) - const serialized = await lr.toObject() - const newModel = await fromObject(serialized) + const serialized = await lr.toJSON() + const newModel = await fromJSON(serialized) expect(tensorEqual(newModel.coef, tf.tensor1d([2.5, 1]), 0.1)).toBe(true) expect(roughlyEqual(newModel.intercept as number, 0)).toBe(true) diff --git a/src/linear_model/LogisticRegression.test.ts b/src/linear_model/LogisticRegression.test.ts index 03243db3..be3fe8e1 100644 --- a/src/linear_model/LogisticRegression.test.ts +++ b/src/linear_model/LogisticRegression.test.ts @@ -1,4 +1,4 @@ -import { LogisticRegression, fromObject } from '../index' +import { LogisticRegression, fromJSON } from '../index' import { tf } from '../shared/globals' describe('LogisticRegression', function () { it('Works on arrays (small example)', async function () { @@ -132,8 +132,8 @@ describe('LogisticRegression', function () { let logreg = new LogisticRegression({ penalty: 'l2' }) await logreg.fit(X, y) - const serializeModel = await logreg.toObject() - const newModel = await fromObject(serializeModel) + const serializeModel = await logreg.toJSON() + const newModel = await fromJSON(serializeModel) const newModelResult = newModel.predict(Xtest) expect(newModelResult.arraySync()).toEqual([0, 0, 0, 0, 0, 0, 2, 2, 2]) diff --git a/src/model_selection/KFold.test.ts b/src/model_selection/KFold.test.ts index 625e0c0d..3b0678fb 100644 --- a/src/model_selection/KFold.test.ts +++ b/src/model_selection/KFold.test.ts @@ -14,7 +14,7 @@ */ import * as fc from 'fast-check' -import { KFold } from './KFold' +import { KFold } from '../index' import { alea } from '../randUtils' import '../jestTensorMatchers' import { tf } from '../shared/globals' diff --git a/src/naive_bayes/GaussianNB.test.ts b/src/naive_bayes/GaussianNB.test.ts index 093f2151..5cdfc9bb 100644 --- a/src/naive_bayes/GaussianNB.test.ts +++ b/src/naive_bayes/GaussianNB.test.ts @@ -12,7 +12,7 @@ * limitations under the License. * ========================================================================== */ -import { GaussianNB, fromObject } from '../index' +import { GaussianNB, fromJSON } from '../index' describe('GaussianNB', function () { it('without priors', async () => { @@ -101,8 +101,8 @@ describe('GaussianNB', function () { await model.fit(X, y) model.predict(X) - const serializeModel = await model.toObject() - const newModel = await fromObject(serializeModel) + const serializeModel = await model.toJSON() + const newModel = await fromJSON(serializeModel) expect(newModel.predict(X).arraySync()).toEqual([0, 0, 1, 1, 1]) }) }) diff --git a/src/neighbors/KNeighborsClassifier.test.ts b/src/neighbors/KNeighborsClassifier.test.ts index 559a72df..b69464fc 100644 --- a/src/neighbors/KNeighborsClassifier.test.ts +++ b/src/neighbors/KNeighborsClassifier.test.ts @@ -13,7 +13,7 @@ * ========================================================================== */ -import { KNeighborsClassifier } from './KNeighborsClassifier' +import { KNeighborsClassifier } from '../index' import { KNeighborsParams } from './KNeighborsBase' import { dataUrls } from '../datasets/datasets' import { crossValScore } from '../model_selection/crossValScore' diff --git a/src/pipeline/Pipeline.test.ts b/src/pipeline/Pipeline.test.ts index 18e77d1c..8a9ac2ec 100644 --- a/src/pipeline/Pipeline.test.ts +++ b/src/pipeline/Pipeline.test.ts @@ -4,7 +4,7 @@ import { LinearRegression, SimpleImputer, MinMaxScaler, - fromObject + fromJSON } from '../index' import { tf } from '../shared/globals' import { tensorEqual } from '../utils' @@ -100,8 +100,8 @@ describe('Pipeline', function () { await pipeline.fit(X, y) - const saveModel = await pipeline.toObject() - const newPipeLine = await fromObject(saveModel) + const saveModel = await pipeline.toJSON() + const newPipeLine = await fromJSON(saveModel) expect(newPipeLine.steps[1][1].min.arraySync()).toEqual([0, 0]) expect( diff --git a/src/preprocessing/MaxAbsScaler.test.ts b/src/preprocessing/MaxAbsScaler.test.ts index 91781bab..f742bdcc 100644 --- a/src/preprocessing/MaxAbsScaler.test.ts +++ b/src/preprocessing/MaxAbsScaler.test.ts @@ -1,4 +1,4 @@ -import { MaxAbsScaler, fromObject } from '../index' +import { MaxAbsScaler, fromJSON } from '../index' import * as dfd from 'danfojs-node' import { tf } from '../shared/globals' import { arrayEqual } from '../utils' @@ -139,8 +139,8 @@ describe('MaxAbsScaler', function () { const data = tf.tensor2d([4, 4, 'whoops', 3, 3] as any, [5, 1]) const scaler = new MaxAbsScaler() scaler.fit(data) - const serial = await scaler.toObject() - const newModel = await fromObject(serial) + const serial = await scaler.toJSON() + const newModel = await fromJSON(serial) expect(newModel.transform(data).arraySync().flat()).toEqual([ 1, 1, diff --git a/src/preprocessing/MinMaxScaler.test.ts b/src/preprocessing/MinMaxScaler.test.ts index b0bf1420..a4efd446 100644 --- a/src/preprocessing/MinMaxScaler.test.ts +++ b/src/preprocessing/MinMaxScaler.test.ts @@ -1,4 +1,4 @@ -import { MinMaxScaler, fromObject } from '../index' +import { MinMaxScaler, fromJSON } from '../index' import * as dfd from 'danfojs-node' import { isDataFrameInterface, isSeriesInterface } from '../typesUtils' import { ScikitVecOrMatrix } from '../types' @@ -164,8 +164,8 @@ describe('MinMaxscaler', function () { const data = tf.tensor2d([4, 4, 'whoops', 3, 3] as any, [5, 1]) const scaler = new MinMaxScaler() scaler.fit(data) - const serial = await scaler.toObject() - const newModel = await fromObject(serial) + const serial = await scaler.toJSON() + const newModel = await fromJSON(serial) expect(newModel.transform(data).arraySync().flat()).toEqual([ 1, 1, diff --git a/src/simpleSerializer.ts b/src/simpleSerializer.ts index bcd8d714..6b54b764 100644 --- a/src/simpleSerializer.ts +++ b/src/simpleSerializer.ts @@ -1,5 +1,5 @@ import { tf } from './shared/globals' - +import { encode, decode } from 'base64-arraybuffer' const EstimatorList = [ 'KNeighborsRegressor', 'LinearRegression', @@ -48,6 +48,7 @@ class JSONHandler { async save(artifacts: any) { // Base 64 encoding + artifacts.weightData = encode(artifacts.weightData) this.savedArtifacts = artifacts return { modelArtifactsInfo: { @@ -62,6 +63,7 @@ class JSONHandler { async load() { // Base64 decode + this.savedArtifacts.weightData = decode(this.savedArtifacts.weightData) return this.savedArtifacts } } @@ -70,7 +72,6 @@ export async function toObjectInner( val: any, ignoreKeys: string[] = [] ): Promise { - // console.log(val) if (['number', 'string', 'undefined', 'boolean'].includes(typeof val)) { return val } @@ -208,6 +209,10 @@ export async function fromObject(val: any): Promise { } } +export async function fromJSON(val: string): Promise { + return await fromObject(JSON.parse(val)) +} + let ignoredKeysForSGDRegressor = [ 'modelCompileArgs', 'modelFitArgs', @@ -222,4 +227,8 @@ export class Serialize { console.error(e) } } + + async toJSON(): Promise { + return JSON.stringify(await this.toObject()) + } } diff --git a/src/tree/Criterion.test.ts b/src/tree/Criterion.test.ts index f9ab852c..76e88960 100644 --- a/src/tree/Criterion.test.ts +++ b/src/tree/Criterion.test.ts @@ -1,5 +1,5 @@ import { ClassificationCriterion, giniCoefficient, entropy } from './Criterion' -import { fromObject } from '../simpleSerializer' +import { fromJSON } from '../simpleSerializer' describe('Criterion', function () { let X = [ [-2, -1], @@ -83,8 +83,8 @@ describe('Criterion', function () { y }) criterion.init(0, 6, sampleMap) - const serial = await criterion.toObject() - const newCriterion = await fromObject(serial) + const serial = await criterion.toJSON() + const newCriterion = await fromJSON(serial) expect(newCriterion.nodeImpurity()).toEqual(1) }, 1000) }) diff --git a/src/tree/DecisionTree.test.ts b/src/tree/DecisionTree.test.ts index c5933bd1..71652fac 100644 --- a/src/tree/DecisionTree.test.ts +++ b/src/tree/DecisionTree.test.ts @@ -1,7 +1,7 @@ import { DecisionTreeClassifier, DecisionTreeRegressor } from './DecisionTree' import { dataUrls } from '../datasets/datasets' import * as dfd from 'danfojs-node' -import { fromObject } from '../simpleSerializer' +import { fromJSON } from '../simpleSerializer' describe('DecisionTree', function () { it('Use the DecisionTree (toy)', async function () { @@ -621,8 +621,8 @@ describe('DecisionTree', function () { let tree_classifier = new DecisionTreeClassifier() tree_classifier.fit(X, y) - const serial = await tree_classifier.toObject() - const newTree = await fromObject(serial) + const serial = await tree_classifier.toJSON() + const newTree = await fromJSON(serial) expect(newTree.predict(T)).toEqual(true_result) }, 1000) }) diff --git a/src/tree/Splitter.test.ts b/src/tree/Splitter.test.ts index 35481658..620aa37e 100644 --- a/src/tree/Splitter.test.ts +++ b/src/tree/Splitter.test.ts @@ -1,6 +1,6 @@ import { ImpurityMeasure } from './Criterion' import { Splitter } from './Splitter' -import { fromObject } from '../simpleSerializer' +import { fromJSON } from '../simpleSerializer' describe('Splitter', function () { let types = ['gini', 'entropy', 'squared_error'] @@ -203,8 +203,8 @@ describe('Splitter', function () { samplesSubset: [] }) splitter.splitNode() - const serial = await splitter.toObject() - const newSplitter = await fromObject(serial) + const serial = await splitter.toJSON() + const newSplitter = await fromJSON(serial) const newBestSplitter = newSplitter.splitNode() expect(newBestSplitter.foundSplit).toEqual(true) expect(newBestSplitter.feature).toEqual(1) From 973c3fb255875d98b334e1a9c8c53ae3aca422b1 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Sun, 8 May 2022 16:34:04 +0000 Subject: [PATCH 4/4] chore(release): 1.21.0 [skip ci] # [1.21.0](https://github.com/javascriptdata/scikit.js/compare/v1.20.0...v1.21.0) (2022-05-08) ### Features * not complete serialization ([7fbea07](https://github.com/javascriptdata/scikit.js/commit/7fbea07431dbd194259e44e868df74617814f6de)) * updated serialization ([ec71323](https://github.com/javascriptdata/scikit.js/commit/ec713230efbf07ac228c0291c21f5fcc0b1bd995)) * updated test ([260c134](https://github.com/javascriptdata/scikit.js/commit/260c1347d7c06c494b24b813c00c11d68c4da354)) --- CHANGELOG.md | 9 +++++++++ package-lock.json | 4 ++-- package.json | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ea38e816..c8317c9b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +# [1.21.0](https://github.com/javascriptdata/scikit.js/compare/v1.20.0...v1.21.0) (2022-05-08) + + +### Features + +* not complete serialization ([7fbea07](https://github.com/javascriptdata/scikit.js/commit/7fbea07431dbd194259e44e868df74617814f6de)) +* updated serialization ([ec71323](https://github.com/javascriptdata/scikit.js/commit/ec713230efbf07ac228c0291c21f5fcc0b1bd995)) +* updated test ([260c134](https://github.com/javascriptdata/scikit.js/commit/260c1347d7c06c494b24b813c00c11d68c4da354)) + # [1.20.0](https://github.com/javascriptdata/scikit.js/compare/v1.19.0...v1.20.0) (2022-04-26) diff --git a/package-lock.json b/package-lock.json index 131322a0..83c3e9fd 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "scikitjs", - "version": "1.20.0", + "version": "1.21.0", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "scikitjs", - "version": "1.20.0", + "version": "1.21.0", "hasInstallScript": true, "license": "ISC", "dependencies": { diff --git a/package.json b/package.json index 071963eb..339f817f 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "scikitjs", - "version": "1.20.0", + "version": "1.21.0", "description": "Scikit-Learn for JS", "output": { "node": "dist/node/index.js", pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy