Search in sources :

Example 31 with PoissonDistribution

use of uk.ac.sussex.gdsc.smlm.math3.distribution.PoissonDistribution in project presto by prestodb.

the class TestTDigest method testPoissonDistribution.

@Test(enabled = false)
public void testPoissonDistribution() {
    int trials = 10;
    for (int k = 1; k < trials; k++) {
        TDigest tDigest = createTDigest(STANDARD_COMPRESSION_FACTOR);
        PoissonDistribution poisson = new PoissonDistribution(k * 0.1);
        List<Integer> list = new ArrayList<>();
        for (int i = 0; i < NUMBER_OF_ENTRIES; i++) {
            int sample = poisson.sample();
            tDigest.add(sample);
            list.add(sample);
        }
        assertSumInts(list, tDigest);
        Collections.sort(list);
        for (int i = 0; i < quantile.length; i++) {
            assertDiscreteWithinBound(quantile[i], STANDARD_ERROR, list, tDigest);
        }
    }
}
Also used : PoissonDistribution(org.apache.commons.math3.distribution.PoissonDistribution) TDigest.createTDigest(com.facebook.presto.tdigest.TDigest.createTDigest) ArrayList(java.util.ArrayList) Test(org.testng.annotations.Test)

Example 32 with PoissonDistribution

use of uk.ac.sussex.gdsc.smlm.math3.distribution.PoissonDistribution in project presto by prestodb.

the class MathFunctions method inversePoissonCdf.

@Description("Inverse of Poisson cdf given lambda (mean) parameter and probability")
@ScalarFunction
@SqlType(StandardTypes.INTEGER)
public static long inversePoissonCdf(@SqlType(StandardTypes.DOUBLE) double lambda, @SqlType(StandardTypes.DOUBLE) double p) {
    checkCondition(p >= 0 && p < 1, INVALID_FUNCTION_ARGUMENT, "p must be in the interval [0, 1)");
    checkCondition(lambda > 0, INVALID_FUNCTION_ARGUMENT, "lambda must be greater than 0");
    PoissonDistribution distribution = new PoissonDistribution(lambda);
    return distribution.inverseCumulativeProbability(p);
}
Also used : PoissonDistribution(org.apache.commons.math3.distribution.PoissonDistribution) DecimalOperators.modulusScalarFunction(com.facebook.presto.type.DecimalOperators.modulusScalarFunction) SqlScalarFunction(com.facebook.presto.metadata.SqlScalarFunction) ScalarFunction(com.facebook.presto.spi.function.ScalarFunction) Description(com.facebook.presto.spi.function.Description) SqlType(com.facebook.presto.spi.function.SqlType)

Example 33 with PoissonDistribution

use of uk.ac.sussex.gdsc.smlm.math3.distribution.PoissonDistribution in project shifu by ShifuML.

the class DTWorker method init.

@Override
public void init(WorkerContext<DTMasterParams, DTWorkerParams> context) {
    Properties props = context.getProps();
    try {
        SourceType sourceType = SourceType.valueOf(props.getProperty(CommonConstants.MODELSET_SOURCE_TYPE, SourceType.HDFS.toString()));
        this.modelConfig = CommonUtils.loadModelConfig(props.getProperty(CommonConstants.SHIFU_MODEL_CONFIG), sourceType);
        this.columnConfigList = CommonUtils.loadColumnConfigList(props.getProperty(CommonConstants.SHIFU_COLUMN_CONFIG), sourceType);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    this.columnCategoryIndexMapping = new HashMap<Integer, Map<String, Integer>>();
    for (ColumnConfig config : this.columnConfigList) {
        if (config.isCategorical()) {
            if (config.getBinCategory() != null) {
                Map<String, Integer> tmpMap = new HashMap<String, Integer>();
                for (int i = 0; i < config.getBinCategory().size(); i++) {
                    List<String> catVals = CommonUtils.flattenCatValGrp(config.getBinCategory().get(i));
                    for (String cval : catVals) {
                        tmpMap.put(cval, i);
                    }
                }
                this.columnCategoryIndexMapping.put(config.getColumnNum(), tmpMap);
            }
        }
    }
    this.hasCandidates = CommonUtils.hasCandidateColumns(columnConfigList);
    // create Splitter
    String delimiter = context.getProps().getProperty(Constants.SHIFU_OUTPUT_DATA_DELIMITER);
    this.splitter = MapReduceUtils.generateShifuOutputSplitter(delimiter);
    Integer kCrossValidation = this.modelConfig.getTrain().getNumKFold();
    if (kCrossValidation != null && kCrossValidation > 0) {
        isKFoldCV = true;
        LOG.info("Cross validation is enabled by kCrossValidation: {}.", kCrossValidation);
    }
    Double upSampleWeight = modelConfig.getTrain().getUpSampleWeight();
    if (Double.compare(upSampleWeight, 1d) != 0 && (modelConfig.isRegression() || (modelConfig.isClassification() && modelConfig.getTrain().isOneVsAll()))) {
        // set mean to upSampleWeight -1 and get sample + 1 to make sure no zero sample value
        LOG.info("Enable up sampling with weight {}.", upSampleWeight);
        this.upSampleRng = new PoissonDistribution(upSampleWeight - 1);
    }
    this.isContinuousEnabled = Boolean.TRUE.toString().equalsIgnoreCase(context.getProps().getProperty(CommonConstants.CONTINUOUS_TRAINING));
    this.workerThreadCount = modelConfig.getTrain().getWorkerThreadCount();
    this.threadPool = Executors.newFixedThreadPool(this.workerThreadCount);
    // enable shut down logic
    context.addCompletionCallBack(new WorkerCompletionCallBack<DTMasterParams, DTWorkerParams>() {

        @Override
        public void callback(WorkerContext<DTMasterParams, DTWorkerParams> context) {
            DTWorker.this.threadPool.shutdownNow();
            try {
                DTWorker.this.threadPool.awaitTermination(2, TimeUnit.SECONDS);
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
            }
        }
    });
    this.trainerId = Integer.valueOf(context.getProps().getProperty(CommonConstants.SHIFU_TRAINER_ID, "0"));
    this.isOneVsAll = modelConfig.isClassification() && modelConfig.getTrain().isOneVsAll();
    GridSearch gs = new GridSearch(modelConfig.getTrain().getParams(), modelConfig.getTrain().getGridConfigFileContent());
    Map<String, Object> validParams = this.modelConfig.getTrain().getParams();
    if (gs.hasHyperParam()) {
        validParams = gs.getParams(this.trainerId);
        LOG.info("Start grid search worker with params: {}", validParams);
    }
    this.treeNum = Integer.valueOf(validParams.get("TreeNum").toString());
    double memoryFraction = Double.valueOf(context.getProps().getProperty("guagua.data.memoryFraction", "0.6"));
    LOG.info("Max heap memory: {}, fraction: {}", Runtime.getRuntime().maxMemory(), memoryFraction);
    double validationRate = this.modelConfig.getValidSetRate();
    if (StringUtils.isNotBlank(modelConfig.getValidationDataSetRawPath())) {
        // fixed 0.6 and 0.4 of max memory for trainingData and validationData
        this.trainingData = new MemoryLimitedList<Data>((long) (Runtime.getRuntime().maxMemory() * memoryFraction * 0.6), new ArrayList<Data>());
        this.validationData = new MemoryLimitedList<Data>((long) (Runtime.getRuntime().maxMemory() * memoryFraction * 0.4), new ArrayList<Data>());
    } else {
        if (Double.compare(validationRate, 0d) != 0) {
            this.trainingData = new MemoryLimitedList<Data>((long) (Runtime.getRuntime().maxMemory() * memoryFraction * (1 - validationRate)), new ArrayList<Data>());
            this.validationData = new MemoryLimitedList<Data>((long) (Runtime.getRuntime().maxMemory() * memoryFraction * validationRate), new ArrayList<Data>());
        } else {
            this.trainingData = new MemoryLimitedList<Data>((long) (Runtime.getRuntime().maxMemory() * memoryFraction), new ArrayList<Data>());
        }
    }
    int[] inputOutputIndex = DTrainUtils.getNumericAndCategoricalInputAndOutputCounts(this.columnConfigList);
    // numerical + categorical = # of all input
    this.inputCount = inputOutputIndex[0] + inputOutputIndex[1];
    // regression outputNodeCount is 1, binaryClassfication, it is 1, OneVsAll it is 1, Native classification it is
    // 1, with index of 0,1,2,3 denotes different classes
    this.isAfterVarSelect = (inputOutputIndex[3] == 1);
    this.isManualValidation = (modelConfig.getValidationDataSetRawPath() != null && !"".equals(modelConfig.getValidationDataSetRawPath()));
    int numClasses = this.modelConfig.isClassification() ? this.modelConfig.getTags().size() : 2;
    String imStr = validParams.get("Impurity").toString();
    int minInstancesPerNode = Integer.valueOf(validParams.get("MinInstancesPerNode").toString());
    double minInfoGain = Double.valueOf(validParams.get("MinInfoGain").toString());
    if (imStr.equalsIgnoreCase("entropy")) {
        impurity = new Entropy(numClasses, minInstancesPerNode, minInfoGain);
    } else if (imStr.equalsIgnoreCase("gini")) {
        impurity = new Gini(numClasses, minInstancesPerNode, minInfoGain);
    } else if (imStr.equalsIgnoreCase("friedmanmse")) {
        impurity = new FriedmanMSE(minInstancesPerNode, minInfoGain);
    } else {
        impurity = new Variance(minInstancesPerNode, minInfoGain);
    }
    this.isRF = ALGORITHM.RF.toString().equalsIgnoreCase(modelConfig.getAlgorithm());
    this.isGBDT = ALGORITHM.GBT.toString().equalsIgnoreCase(modelConfig.getAlgorithm());
    String lossStr = validParams.get("Loss").toString();
    if (lossStr.equalsIgnoreCase("log")) {
        this.loss = new LogLoss();
    } else if (lossStr.equalsIgnoreCase("absolute")) {
        this.loss = new AbsoluteLoss();
    } else if (lossStr.equalsIgnoreCase("halfgradsquared")) {
        this.loss = new HalfGradSquaredLoss();
    } else if (lossStr.equalsIgnoreCase("squared")) {
        this.loss = new SquaredLoss();
    } else {
        try {
            this.loss = (Loss) ClassUtils.newInstance(Class.forName(lossStr));
        } catch (ClassNotFoundException e) {
            LOG.warn("Class not found for {}, using default SquaredLoss", lossStr);
            this.loss = new SquaredLoss();
        }
    }
    if (this.isGBDT) {
        this.learningRate = Double.valueOf(validParams.get(CommonConstants.LEARNING_RATE).toString());
        Object swrObj = validParams.get("GBTSampleWithReplacement");
        if (swrObj != null) {
            this.gbdtSampleWithReplacement = Boolean.TRUE.toString().equalsIgnoreCase(swrObj.toString());
        }
        Object dropoutObj = validParams.get(CommonConstants.DROPOUT_RATE);
        if (dropoutObj != null) {
            this.dropOutRate = Double.valueOf(dropoutObj.toString());
        }
    }
    this.isStratifiedSampling = this.modelConfig.getTrain().getStratifiedSample();
    this.checkpointOutput = new Path(context.getProps().getProperty(CommonConstants.SHIFU_DT_MASTER_CHECKPOINT_FOLDER, "tmp/cp_" + context.getAppId()));
    LOG.info("Worker init params:isAfterVarSel={}, treeNum={}, impurity={}, loss={}, learningRate={}, gbdtSampleWithReplacement={}, isRF={}, isGBDT={}, isStratifiedSampling={}, isKFoldCV={}, kCrossValidation={}, dropOutRate={}", isAfterVarSelect, treeNum, impurity.getClass().getName(), loss.getClass().getName(), this.learningRate, this.gbdtSampleWithReplacement, this.isRF, this.isGBDT, this.isStratifiedSampling, this.isKFoldCV, kCrossValidation, this.dropOutRate);
    // for fail over, load existing trees
    if (!context.isFirstIteration()) {
        if (this.isGBDT) {
            // set flag here and recover later in doComputing, this is to make sure recover after load part which
            // can load latest trees in #doCompute
            isNeedRecoverGBDTPredict = true;
        } else {
            // RF , trees are recovered from last master results
            recoverTrees = context.getLastMasterResult().getTrees();
        }
    }
    if (context.isFirstIteration() && this.isContinuousEnabled && this.isGBDT) {
        Path modelPath = new Path(context.getProps().getProperty(CommonConstants.GUAGUA_OUTPUT));
        TreeModel existingModel = null;
        try {
            existingModel = (TreeModel) ModelSpecLoaderUtils.loadModel(modelConfig, modelPath, ShifuFileUtils.getFileSystemBySourceType(this.modelConfig.getDataSet().getSource()));
        } catch (IOException e) {
            LOG.error("Error in get existing model, will ignore and start from scratch", e);
        }
        if (existingModel == null) {
            LOG.warn("No model is found even set to continuous model training.");
            return;
        } else {
            recoverTrees = existingModel.getTrees();
            LOG.info("Loading existing {} trees", recoverTrees.size());
        }
    }
}
Also used : PoissonDistribution(org.apache.commons.math3.distribution.PoissonDistribution) ColumnConfig(ml.shifu.shifu.container.obj.ColumnConfig) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) SourceType(ml.shifu.shifu.container.obj.RawSourceData.SourceType) ArrayList(java.util.ArrayList) Properties(java.util.Properties) TreeModel(ml.shifu.shifu.core.TreeModel) GuaguaRuntimeException(ml.shifu.guagua.GuaguaRuntimeException) Path(org.apache.hadoop.fs.Path) IOException(java.io.IOException) GridSearch(ml.shifu.shifu.core.dtrain.gs.GridSearch) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap)

Example 34 with PoissonDistribution

use of uk.ac.sussex.gdsc.smlm.math3.distribution.PoissonDistribution in project shifu by ShifuML.

the class DTWorker method sampleWeights.

private float[] sampleWeights(float label) {
    float[] sampleWeights = null;
    // sample negative or kFoldCV, sample rate is 1d
    double sampleRate = (modelConfig.getTrain().getSampleNegOnly() || this.isKFoldCV) ? 1d : modelConfig.getTrain().getBaggingSampleRate();
    int classValue = (int) (label + 0.01f);
    if (this.treeNum == 1 || (this.isGBDT && !this.gbdtSampleWithReplacement)) {
        // if tree == 1 or GBDT, don't use with replacement sampling; for GBDT, every time is one tree
        sampleWeights = new float[1];
        Random random = null;
        if (this.isStratifiedSampling) {
            random = baggingRandomMap.get(classValue);
            if (random == null) {
                random = DTrainUtils.generateRandomBySampleSeed(modelConfig.getTrain().getBaggingSampleSeed(), CommonConstants.NOT_CONFIGURED_BAGGING_SEED);
                baggingRandomMap.put(classValue, random);
            }
        } else {
            random = baggingRandomMap.get(0);
            if (random == null) {
                random = DTrainUtils.generateRandomBySampleSeed(modelConfig.getTrain().getBaggingSampleSeed(), CommonConstants.NOT_CONFIGURED_BAGGING_SEED);
                baggingRandomMap.put(0, random);
            }
        }
        if (random.nextDouble() <= sampleRate) {
            sampleWeights[0] = 1f;
        } else {
            sampleWeights[0] = 0f;
        }
    } else {
        // if gbdt and gbdtSampleWithReplacement = true, still sampling with replacement
        sampleWeights = new float[this.treeNum];
        if (this.isStratifiedSampling) {
            PoissonDistribution[] rng = this.baggingRngMap.get(classValue);
            if (rng == null) {
                rng = new PoissonDistribution[treeNum];
                for (int i = 0; i < treeNum; i++) {
                    rng[i] = new PoissonDistribution(sampleRate);
                }
                this.baggingRngMap.put(classValue, rng);
            }
            for (int i = 0; i < sampleWeights.length; i++) {
                sampleWeights[i] = rng[i].sample();
            }
        } else {
            PoissonDistribution[] rng = this.baggingRngMap.get(0);
            if (rng == null) {
                rng = new PoissonDistribution[treeNum];
                for (int i = 0; i < treeNum; i++) {
                    rng[i] = new PoissonDistribution(sampleRate);
                }
                this.baggingRngMap.put(0, rng);
            }
            for (int i = 0; i < sampleWeights.length; i++) {
                sampleWeights[i] = rng[i].sample();
            }
        }
    }
    return sampleWeights;
}
Also used : PoissonDistribution(org.apache.commons.math3.distribution.PoissonDistribution) Random(java.util.Random)

Example 35 with PoissonDistribution

use of uk.ac.sussex.gdsc.smlm.math3.distribution.PoissonDistribution in project shifu by ShifuML.

the class AbstractNNWorker method init.

@Override
public void init(WorkerContext<NNParams, NNParams> context) {
    // load props firstly
    this.props = context.getProps();
    loadConfigFiles(context.getProps());
    this.trainerId = Integer.valueOf(context.getProps().getProperty(CommonConstants.SHIFU_TRAINER_ID, "0"));
    GridSearch gs = new GridSearch(modelConfig.getTrain().getParams(), modelConfig.getTrain().getGridConfigFileContent());
    this.validParams = this.modelConfig.getTrain().getParams();
    if (gs.hasHyperParam()) {
        this.validParams = gs.getParams(trainerId);
        LOG.info("Start grid search master with params: {}", validParams);
    }
    Integer kCrossValidation = this.modelConfig.getTrain().getNumKFold();
    if (kCrossValidation != null && kCrossValidation > 0) {
        isKFoldCV = true;
        LOG.info("Cross validation is enabled by kCrossValidation: {}.", kCrossValidation);
    }
    this.poissonSampler = Boolean.TRUE.toString().equalsIgnoreCase(context.getProps().getProperty(NNConstants.NN_POISON_SAMPLER));
    this.rng = new PoissonDistribution(1.0d);
    Double upSampleWeight = modelConfig.getTrain().getUpSampleWeight();
    if (Double.compare(upSampleWeight, 1d) != 0 && (modelConfig.isRegression() || (modelConfig.isClassification() && modelConfig.getTrain().isOneVsAll()))) {
        // set mean to upSampleWeight -1 and get sample + 1to make sure no zero sample value
        LOG.info("Enable up sampling with weight {}.", upSampleWeight);
        this.upSampleRng = new PoissonDistribution(upSampleWeight - 1);
    }
    Integer epochsPerIterationInteger = this.modelConfig.getTrain().getEpochsPerIteration();
    this.epochsPerIteration = epochsPerIterationInteger == null ? 1 : epochsPerIterationInteger.intValue();
    LOG.info("epochsPerIteration in worker is :{}", epochsPerIteration);
    // Object elmObject = validParams.get(DTrainUtils.IS_ELM);
    // isELM = elmObject == null ? false : "true".equalsIgnoreCase(elmObject.toString());
    // LOG.info("Check isELM: {}", isELM);
    Object dropoutRateObj = validParams.get(CommonConstants.DROPOUT_RATE);
    if (dropoutRateObj != null) {
        this.dropoutRate = Double.valueOf(dropoutRateObj.toString());
    }
    LOG.info("'dropoutRate' in worker is :{}", this.dropoutRate);
    Object miniBatchO = validParams.get(CommonConstants.MINI_BATCH);
    if (miniBatchO != null) {
        int miniBatchs;
        try {
            miniBatchs = Integer.parseInt(miniBatchO.toString());
        } catch (Exception e) {
            miniBatchs = 1;
        }
        if (miniBatchs < 0) {
            this.batchs = 1;
        } else if (miniBatchs > 1000) {
            this.batchs = 1000;
        } else {
            this.batchs = miniBatchs;
        }
        LOG.info("'miniBatchs' in worker is : {}, batchs is {} ", miniBatchs, batchs);
    }
    int[] inputOutputIndex = DTrainUtils.getInputOutputCandidateCounts(modelConfig.getNormalizeType(), this.columnConfigList);
    this.inputNodeCount = inputOutputIndex[0] == 0 ? inputOutputIndex[2] : inputOutputIndex[0];
    // if is one vs all classification, outputNodeCount is set to 1, if classes=2, outputNodeCount is also 1
    int classes = modelConfig.getTags().size();
    this.outputNodeCount = (isLinearTarget || modelConfig.isRegression()) ? inputOutputIndex[1] : (modelConfig.getTrain().isOneVsAll() ? inputOutputIndex[1] : (classes == 2 ? 1 : classes));
    this.candidateCount = inputOutputIndex[2];
    boolean isAfterVarSelect = inputOutputIndex[0] != 0;
    LOG.info("isAfterVarSelect {}: Input count {}, output count {}, candidate count {}", isAfterVarSelect, inputNodeCount, outputNodeCount, candidateCount);
    // cache all feature list for sampling features
    this.allFeatures = NormalUtils.getAllFeatureList(columnConfigList, isAfterVarSelect);
    String subsetStr = context.getProps().getProperty(CommonConstants.SHIFU_NN_FEATURE_SUBSET);
    if (StringUtils.isBlank(subsetStr)) {
        this.subFeatures = this.allFeatures;
    } else {
        String[] splits = subsetStr.split(",");
        this.subFeatures = new ArrayList<Integer>(splits.length);
        for (String split : splits) {
            int featureIndex = Integer.parseInt(split);
            this.subFeatures.add(featureIndex);
        }
    }
    this.subFeatureSet = new HashSet<Integer>(this.subFeatures);
    LOG.info("subFeatures size is {}", subFeatures.size());
    this.featureInputsCnt = DTrainUtils.getFeatureInputsCnt(this.modelConfig, this.columnConfigList, this.subFeatureSet);
    this.wgtInit = "default";
    Object wgtInitObj = validParams.get(CommonConstants.WEIGHT_INITIALIZER);
    if (wgtInitObj != null) {
        this.wgtInit = wgtInitObj.toString();
    }
    Object lossObj = validParams.get("Loss");
    this.lossStr = lossObj != null ? lossObj.toString() : "squared";
    LOG.info("Loss str is {}", this.lossStr);
    this.isDry = Boolean.TRUE.toString().equalsIgnoreCase(context.getProps().getProperty(CommonConstants.SHIFU_DRY_DTRAIN));
    this.isSpecificValidation = (modelConfig.getValidationDataSetRawPath() != null && !"".equals(modelConfig.getValidationDataSetRawPath()));
    this.isStratifiedSampling = this.modelConfig.getTrain().getStratifiedSample();
    if (isOnDisk()) {
        LOG.info("NNWorker is loading data into disk.");
        try {
            initDiskDataSet();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        // cannot find a good place to close these two data set, using Shutdown hook
        Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {

            @Override
            public void run() {
                ((BufferedFloatMLDataSet) (AbstractNNWorker.this.trainingData)).close();
                ((BufferedFloatMLDataSet) (AbstractNNWorker.this.validationData)).close();
            }
        }));
    } else {
        LOG.info("NNWorker is loading data into memory.");
        double memoryFraction = Double.valueOf(context.getProps().getProperty("guagua.data.memoryFraction", "0.6"));
        long memoryStoreSize = (long) (Runtime.getRuntime().maxMemory() * memoryFraction);
        LOG.info("Max heap memory: {}, fraction: {}", Runtime.getRuntime().maxMemory(), memoryFraction);
        double crossValidationRate = this.modelConfig.getValidSetRate();
        try {
            if (StringUtils.isNotBlank(modelConfig.getValidationDataSetRawPath())) {
                // fixed 0.6 and 0.4 of max memory for trainingData and validationData
                this.trainingData = new MemoryDiskFloatMLDataSet((long) (memoryStoreSize * 0.6), DTrainUtils.getTrainingFile().toString(), this.featureInputsCnt, this.outputNodeCount);
                this.validationData = new MemoryDiskFloatMLDataSet((long) (memoryStoreSize * 0.4), DTrainUtils.getTestingFile().toString(), this.featureInputsCnt, this.outputNodeCount);
            } else {
                this.trainingData = new MemoryDiskFloatMLDataSet((long) (memoryStoreSize * (1 - crossValidationRate)), DTrainUtils.getTrainingFile().toString(), this.featureInputsCnt, this.outputNodeCount);
                this.validationData = new MemoryDiskFloatMLDataSet((long) (memoryStoreSize * crossValidationRate), DTrainUtils.getTestingFile().toString(), this.featureInputsCnt, this.outputNodeCount);
            }
            // cannot find a good place to close these two data set, using Shutdown hook
            Runtime.getRuntime().addShutdownHook(new Thread(new Runnable() {

                @Override
                public void run() {
                    ((MemoryDiskFloatMLDataSet) (AbstractNNWorker.this.trainingData)).close();
                    ((MemoryDiskFloatMLDataSet) (AbstractNNWorker.this.validationData)).close();
                }
            }));
        } catch (IOException e) {
            throw new GuaguaRuntimeException(e);
        }
    }
    // create Splitter
    String delimiter = context.getProps().getProperty(Constants.SHIFU_OUTPUT_DATA_DELIMITER);
    this.splitter = MapReduceUtils.generateShifuOutputSplitter(delimiter);
}
Also used : PoissonDistribution(org.apache.commons.math3.distribution.PoissonDistribution) MemoryDiskFloatMLDataSet(ml.shifu.shifu.core.dtrain.dataset.MemoryDiskFloatMLDataSet) IOException(java.io.IOException) IOException(java.io.IOException) GuaguaRuntimeException(ml.shifu.guagua.GuaguaRuntimeException) GridSearch(ml.shifu.shifu.core.dtrain.gs.GridSearch) GuaguaRuntimeException(ml.shifu.guagua.GuaguaRuntimeException) GuaguaRuntimeException(ml.shifu.guagua.GuaguaRuntimeException) BufferedFloatMLDataSet(ml.shifu.shifu.core.dtrain.dataset.BufferedFloatMLDataSet)

Aggregations

PoissonDistribution (org.apache.commons.math3.distribution.PoissonDistribution)39 DoubleDoubleBiPredicate (uk.ac.sussex.gdsc.test.api.function.DoubleDoubleBiPredicate)7 SeededTest (uk.ac.sussex.gdsc.test.junit5.SeededTest)5 Test (org.junit.jupiter.api.Test)4 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 Random (java.util.Random)3 RandomGenerator (org.apache.commons.math3.random.RandomGenerator)3 Test (org.junit.Test)3 SqlScalarFunction (com.facebook.presto.metadata.SqlScalarFunction)2 Description (com.facebook.presto.spi.function.Description)2 ScalarFunction (com.facebook.presto.spi.function.ScalarFunction)2 SqlType (com.facebook.presto.spi.function.SqlType)2 TDigest.createTDigest (com.facebook.presto.tdigest.TDigest.createTDigest)2 DecimalOperators.modulusScalarFunction (com.facebook.presto.type.DecimalOperators.modulusScalarFunction)2 Sets (com.google.cloud.dataflow.sdk.repackaged.com.google.common.collect.Sets)2 java.util (java.util)2 GuaguaRuntimeException (ml.shifu.guagua.GuaguaRuntimeException)2 GridSearch (ml.shifu.shifu.core.dtrain.gs.GridSearch)2 PoissonDistribution (uk.ac.sussex.gdsc.smlm.math3.distribution.PoissonDistribution)2