Search in sources :

Example 6 with Pair

use of org.knime.core.util.Pair in project knime-core by knime.

the class RegressionTreePMMLPredictorNodeModel method importModel.

private Pair<RegressionTreeModel, RegressionTreeModelPortObjectSpec> importModel(final PMMLPortObject pmmlPO) {
    RegressionTreeModelPMMLTranslator pmmlTranslator = new RegressionTreeModelPMMLTranslator();
    pmmlPO.initializeModelTranslator(pmmlTranslator);
    if (pmmlTranslator.hasWarning()) {
        setWarningMessage(pmmlTranslator.getWarning());
    }
    return new Pair<>(new RegressionTreeModel(pmmlTranslator.getTreeMetaData(), pmmlTranslator.getTree(), TreeType.Ordinary), new RegressionTreeModelPortObjectSpec(pmmlTranslator.getLearnSpec()));
}
Also used : RegressionTreeModel(org.knime.base.node.mine.treeensemble2.model.RegressionTreeModel) RegressionTreeModelPortObjectSpec(org.knime.base.node.mine.treeensemble2.model.RegressionTreeModelPortObjectSpec) RegressionTreeModelPMMLTranslator(org.knime.base.node.mine.treeensemble2.model.pmml.RegressionTreeModelPMMLTranslator) Pair(org.knime.core.util.Pair)

Example 7 with Pair

use of org.knime.core.util.Pair in project knime-core by knime.

the class DataCellToJavaConverterRegistry method parseAnnotations.

/*
     * Parse @DataCellFactoryMethod and @DataValueAccessMethod annotations
     */
private void parseAnnotations() {
    final Collection<DataType> availableDataTypes = DataTypeRegistry.getInstance().availableDataTypes();
    final Set<Class<? extends DataValue>> processedValueClasses = new HashSet<>();
    for (final DataType dataType : availableDataTypes) {
        for (final Class<? extends DataValue> valueClass : dataType.getValueClasses()) {
            if (processedValueClasses.contains(valueClass)) {
                // already parsed this value class
                continue;
            }
            // get methods annotated with DataValueAccessMethod
            final Collection<Pair<Method, DataValueAccessMethod>> methodsWithAnnotation = ClassUtil.getMethodsWithAnnotation(valueClass, DataValueAccessMethod.class);
            // register a converter for every DataValueAccessMethod annotation
            for (final Pair<Method, DataValueAccessMethod> pair : methodsWithAnnotation) {
                parseAnnotation(valueClass, pair.getFirst(), pair.getSecond());
            }
            processedValueClasses.add(valueClass);
        }
    }
}
Also used : CollectionDataValue(org.knime.core.data.collection.CollectionDataValue) DataValue(org.knime.core.data.DataValue) DataValueAccessMethod(org.knime.core.data.convert.DataValueAccessMethod) DataType(org.knime.core.data.DataType) Method(java.lang.reflect.Method) DataValueAccessMethod(org.knime.core.data.convert.DataValueAccessMethod) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) Pair(org.knime.core.util.Pair)

Example 8 with Pair

use of org.knime.core.util.Pair in project knime-core by knime.

the class AppendedRowsRowInput method create.

/**
 * Concatenates a set of Inputs. Duplicates are handled according to the policy argument. Columns present in one
 * input but not the others or conflicting column tables are handled as described in {@link AppendedRowsTable}.
 *
 * @param ins all inputs to be appended (non-null and no null values allowed)
 * @param duplPolicy How to deal with duplicate keys. Non-null.
 * @param suffix suffix to append to duplicate keys (must not be null if policy is
 *            {@link DuplicatePolicy#AppendSuffix})
 * @param exec (optional) execution monitor that is used to report progress and check for cancelation. Can be null.
 * @param totalRowCount The number of rows to expect (sum over all row counts in the inputs). Only be used for
 *            progress -- can be negative to have no progress.
 * @return a new row input whose iteration scans all argument inputs.
 */
public static AppendedRowsRowInput create(final RowInput[] ins, final DuplicatePolicy duplPolicy, final String suffix, final ExecutionMonitor exec, final long totalRowCount) {
    DataTableSpec[] specs = new DataTableSpec[ins.length];
    for (int i = 0; i < specs.length; i++) {
        specs[i] = ins[i].getDataTableSpec();
    }
    DataTableSpec spec = AppendedRowsTable.generateDataTableSpec(specs);
    CheckUtils.checkArgumentNotNull(duplPolicy, "Arg must not be null");
    if (DuplicatePolicy.AppendSuffix.equals(duplPolicy)) {
        CheckUtils.checkArgument(StringUtils.isNotEmpty(suffix), "Suffix must not be an empty string.");
    }
    PairSupplier[] suppliers = new PairSupplier[ins.length];
    for (int i = 0; i < suppliers.length; i++) {
        suppliers[i] = new PairSupplier(new Pair<RowIterator, DataTableSpec>(new RowInputIterator(ins[i]), ins[i].getDataTableSpec()));
    }
    AppendedRowsIterator it = new AppendedRowsIterator(suppliers, duplPolicy, suffix, spec, exec, totalRowCount);
    return new AppendedRowsRowInput(spec, it);
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PairSupplier(org.knime.core.data.append.AppendedRowsIterator.PairSupplier) Pair(org.knime.core.util.Pair)

Example 9 with Pair

use of org.knime.core.util.Pair in project knime-core by knime.

the class LKGradientBoostedTreesLearner method learn.

/**
 * {@inheritDoc}
 *
 * @throws ExecutionException
 * @throws InterruptedException
 */
@Override
public MultiClassGradientBoostedTreesModel learn(final ExecutionMonitor exec) throws CanceledExecutionException, InterruptedException, ExecutionException {
    final TreeData data = getData();
    final TreeTargetNominalColumnData target = (TreeTargetNominalColumnData) data.getTargetColumn();
    final NominalValueRepresentation[] classNomVals = target.getMetaData().getValues();
    final int numClasses = classNomVals.length;
    final String[] classLabels = new String[numClasses];
    final int nrModels = getConfig().getNrModels();
    final int nrRows = target.getNrRows();
    final TreeModelRegression[][] models = new TreeModelRegression[nrModels][numClasses];
    final ArrayList<ArrayList<Map<TreeNodeSignature, Double>>> coefficientMaps = new ArrayList<ArrayList<Map<TreeNodeSignature, Double>>>(nrModels);
    // variables for parallelization
    final ThreadPool tp = KNIMEConstants.GLOBAL_THREAD_POOL;
    final AtomicReference<Throwable> learnThrowableRef = new AtomicReference<Throwable>();
    final int procCount = 3 * Runtime.getRuntime().availableProcessors() / 2;
    exec.setMessage("Transforming problem");
    // transform the original k class classification problem into k regression problems
    final TreeData[] actual = new TreeData[numClasses];
    for (int i = 0; i < numClasses; i++) {
        final double[] newTarget = calculateNewTarget(target, i);
        actual[i] = createNumericDataFromArray(newTarget);
        classLabels[i] = classNomVals[i].getNominalValue();
    }
    final RandomData rd = getConfig().createRandomData();
    final double[][] previousFunctions = new double[numClasses][nrRows];
    TreeNodeSignatureFactory signatureFactory = null;
    final int maxLevels = getConfig().getMaxLevels();
    if (maxLevels < TreeEnsembleLearnerConfiguration.MAX_LEVEL_INFINITE) {
        int capacity = IntMath.pow(2, maxLevels - 1);
        signatureFactory = new TreeNodeSignatureFactory(capacity);
    } else {
        signatureFactory = new TreeNodeSignatureFactory();
    }
    exec.setMessage("Learn trees");
    for (int i = 0; i < nrModels; i++) {
        final Semaphore semaphore = new Semaphore(procCount);
        final ArrayList<Map<TreeNodeSignature, Double>> classCoefficientMaps = new ArrayList<Map<TreeNodeSignature, Double>>(numClasses);
        // prepare calculation of pseudoResiduals
        final double[][] probs = new double[numClasses][nrRows];
        for (int r = 0; r < nrRows; r++) {
            double sumExpF = 0;
            for (int j = 0; j < numClasses; j++) {
                sumExpF += Math.exp(previousFunctions[j][r]);
            }
            for (int j = 0; j < numClasses; j++) {
                probs[j][r] = Math.exp(previousFunctions[j][r]) / sumExpF;
            }
        }
        final Future<?>[] treeCoefficientMapPairs = new Future<?>[numClasses];
        for (int j = 0; j < numClasses; j++) {
            checkThrowable(learnThrowableRef);
            final RandomData rdSingle = TreeEnsembleLearnerConfiguration.createRandomData(rd.nextLong(Long.MIN_VALUE, Long.MAX_VALUE));
            final ExecutionMonitor subExec = exec.createSubProgress(0.0);
            semaphore.acquire();
            treeCoefficientMapPairs[j] = tp.enqueue(new TreeLearnerCallable(rdSingle, probs[j], actual[j], subExec, numClasses, previousFunctions[j], semaphore, learnThrowableRef, signatureFactory));
        }
        for (int j = 0; j < numClasses; j++) {
            checkThrowable(learnThrowableRef);
            semaphore.acquire();
            final Pair<TreeModelRegression, Map<TreeNodeSignature, Double>> pair = (Pair<TreeModelRegression, Map<TreeNodeSignature, Double>>) treeCoefficientMapPairs[j].get();
            models[i][j] = pair.getFirst();
            classCoefficientMaps.add(pair.getSecond());
            semaphore.release();
        }
        checkThrowable(learnThrowableRef);
        coefficientMaps.add(classCoefficientMaps);
        exec.setProgress((double) i / nrModels, "Finished level " + i + "/" + nrModels);
    }
    return MultiClassGradientBoostedTreesModel.createMultiClassGradientBoostedTreesModel(getConfig(), data.getMetaData(), models, data.getTreeType(), 0, numClasses, coefficientMaps, classLabels);
}
Also used : RandomData(org.apache.commons.math.random.RandomData) ArrayList(java.util.ArrayList) ThreadPool(org.knime.core.util.ThreadPool) NominalValueRepresentation(org.knime.base.node.mine.treeensemble2.data.NominalValueRepresentation) Semaphore(java.util.concurrent.Semaphore) TreeNodeSignature(org.knime.base.node.mine.treeensemble2.model.TreeNodeSignature) TreeModelRegression(org.knime.base.node.mine.treeensemble2.model.TreeModelRegression) ExecutionMonitor(org.knime.core.node.ExecutionMonitor) TreeNodeSignatureFactory(org.knime.base.node.mine.treeensemble2.learner.TreeNodeSignatureFactory) Pair(org.knime.core.util.Pair) AtomicReference(java.util.concurrent.atomic.AtomicReference) Future(java.util.concurrent.Future) TreeData(org.knime.base.node.mine.treeensemble2.data.TreeData) HashMap(java.util.HashMap) Map(java.util.Map) TreeTargetNominalColumnData(org.knime.base.node.mine.treeensemble2.data.TreeTargetNominalColumnData)

Example 10 with Pair

use of org.knime.core.util.Pair in project knime-core by knime.

the class TreeNominalColumnDataTest method createPCATestData.

private static Pair<TreeNominalColumnData, TreeTargetNominalColumnData> createPCATestData(final TreeEnsembleLearnerConfiguration config) {
    DataColumnSpec colSpec = new DataColumnSpecCreator("test-col", StringCell.TYPE).createSpec();
    final String[] attVals = new String[] { "A", "B", "C", "D", "E" };
    final String[] classes = new String[] { "T1", "T2", "T3" };
    TreeNominalColumnDataCreator colCreator = new TreeNominalColumnDataCreator(colSpec);
    DataColumnSpecCreator specCreator = new DataColumnSpecCreator("target-col", StringCell.TYPE);
    specCreator.setDomain(new DataColumnDomainCreator(Arrays.stream(classes).distinct().map(s -> new StringCell(s)).toArray(i -> new StringCell[i])).createDomain());
    DataColumnSpec targetSpec = specCreator.createSpec();
    TreeTargetColumnDataCreator targetCreator = new TreeTargetNominalColumnDataCreator(targetSpec);
    long rowKeyCounter = 0;
    final int[][] classDistributions = new int[][] { { 40, 10, 10 }, { 10, 40, 10 }, { 20, 30, 10 }, { 20, 15, 25 }, { 10, 5, 45 } };
    for (int i = 0; i < attVals.length; i++) {
        for (int j = 0; j < classes.length; j++) {
            for (int k = 0; k < classDistributions[i][j]; k++) {
                RowKey key = RowKey.createRowKey(rowKeyCounter++);
                colCreator.add(key, new StringCell(attVals[i]));
                targetCreator.add(key, new StringCell(classes[j]));
            }
        }
    }
    final TreeNominalColumnData testColData = colCreator.createColumnData(0, config);
    testColData.getMetaData().setAttributeIndex(0);
    return Pair.create(testColData, (TreeTargetNominalColumnData) targetCreator.createColumnData());
}
Also used : Arrays(java.util.Arrays) RandomData(org.apache.commons.math.random.RandomData) RowKey(org.knime.core.data.RowKey) IsInstanceOf.instanceOf(org.hamcrest.core.IsInstanceOf.instanceOf) InvalidSettingsException(org.knime.core.node.InvalidSettingsException) SplitCriterion(org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration.SplitCriterion) DataMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.DataMemberships) TreeNodeNominalCondition(org.knime.base.node.mine.treeensemble2.model.TreeNodeNominalCondition) Pair(org.knime.core.util.Pair) Assert.assertThat(org.junit.Assert.assertThat) ColumnSamplingMode(org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration.ColumnSamplingMode) TreeEnsembleLearnerConfiguration(org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration) DataColumnSpec(org.knime.core.data.DataColumnSpec) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) Assert.assertArrayEquals(org.junit.Assert.assertArrayEquals) NominalMultiwaySplitCandidate(org.knime.base.node.mine.treeensemble2.learner.NominalMultiwaySplitCandidate) SetLogic(org.knime.base.node.mine.treeensemble2.model.TreeNodeNominalBinaryCondition.SetLogic) NominalBinarySplitCandidate(org.knime.base.node.mine.treeensemble2.learner.NominalBinarySplitCandidate) BigInteger(java.math.BigInteger) TreeNodeNominalBinaryCondition(org.knime.base.node.mine.treeensemble2.model.TreeNodeNominalBinaryCondition) SplitCandidate(org.knime.base.node.mine.treeensemble2.learner.SplitCandidate) TreeType(org.knime.base.node.mine.treeensemble2.model.AbstractTreeEnsembleModel.TreeType) Assert.assertNotNull(org.junit.Assert.assertNotNull) IDataIndexManager(org.knime.base.node.mine.treeensemble2.data.memberships.IDataIndexManager) RootDataMemberships(org.knime.base.node.mine.treeensemble2.data.memberships.RootDataMemberships) Assert.assertTrue(org.junit.Assert.assertTrue) Test(org.junit.Test) DoubleCell(org.knime.core.data.def.DoubleCell) DefaultDataIndexManager(org.knime.base.node.mine.treeensemble2.data.memberships.DefaultDataIndexManager) Assert.assertNull(org.junit.Assert.assertNull) Assert.assertFalse(org.junit.Assert.assertFalse) StringCell(org.knime.core.data.def.StringCell) BitSet(java.util.BitSet) MissingValueHandling(org.knime.base.node.mine.treeensemble2.node.learner.TreeEnsembleLearnerConfiguration.MissingValueHandling) Assert.assertEquals(org.junit.Assert.assertEquals) DataColumnSpecCreator(org.knime.core.data.DataColumnSpecCreator) RowKey(org.knime.core.data.RowKey) DataColumnDomainCreator(org.knime.core.data.DataColumnDomainCreator) DataColumnSpec(org.knime.core.data.DataColumnSpec) StringCell(org.knime.core.data.def.StringCell)

Aggregations

Pair (org.knime.core.util.Pair)54 ArrayList (java.util.ArrayList)17 DataCell (org.knime.core.data.DataCell)14 DataType (org.knime.core.data.DataType)13 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)13 PortType (org.knime.core.node.port.PortType)13 LinkedHashMap (java.util.LinkedHashMap)11 Map (java.util.Map)10 DataColumnSpec (org.knime.core.data.DataColumnSpec)10 HashMap (java.util.HashMap)9 HashSet (java.util.HashSet)9 DataTableSpec (org.knime.core.data.DataTableSpec)9 FlowVariable (org.knime.core.node.workflow.FlowVariable)9 DataRow (org.knime.core.data.DataRow)8 StringCell (org.knime.core.data.def.StringCell)7 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)7 DataColumnSpecCreator (org.knime.core.data.DataColumnSpecCreator)6 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)6 DefaultRow (org.knime.core.data.def.DefaultRow)6 DoubleCell (org.knime.core.data.def.DoubleCell)6