Search in sources :

Example 91 with BufferedDataTable

use of org.knime.core.node.BufferedDataTable in project knime-core by knime.

the class JoinerTest method testIncreaseNumPartitionsInnerJoin.

/**
 * Checks whether an inner join works as expected when the number of partitions must be increased due to low memory.
 *
 * @throws Exception if an error occurs
 */
@Test
public final void testIncreaseNumPartitionsInnerJoin() throws Exception {
    Joiner2Settings settingsRef = createReferenceSettings("Data");
    Joiner2Settings settingsTest = createReferenceSettings("Data");
    BufferedDataTable leftTable = m_exec.createBufferedDataTable(new TestData(100, 1), m_exec);
    BufferedDataTable rightTable = m_exec.createBufferedDataTable(new TestData(200, 1), m_exec);
    // run joiner with reference settings
    Joiner joinerRef = new Joiner(leftTable.getDataTableSpec(), rightTable.getDataTableSpec(), settingsRef);
    BufferedDataTable reference = joinerRef.computeJoinTable(leftTable, rightTable, m_exec);
    // run joiner with test settings
    Joiner joinerTest = new Joiner(leftTable.getDataTableSpec(), rightTable.getDataTableSpec(), settingsTest);
    joinerTest.setRowsAddedBeforeOOM(10);
    BufferedDataTable test = joinerTest.computeJoinTable(leftTable, rightTable, m_exec);
    compareTables(reference, test);
}
Also used : BufferedDataTable(org.knime.core.node.BufferedDataTable) Test(org.junit.Test)

Example 92 with BufferedDataTable

use of org.knime.core.node.BufferedDataTable in project knime-core by knime.

the class JoinerTest method testSortPartitionsInnerJoin.

@Test
public void testSortPartitionsInnerJoin() throws Exception {
    Joiner2Settings settingsRef = createReferenceSettings(Joiner2Settings.ROW_KEY_IDENTIFIER);
    Joiner2Settings settingsTest = createReferenceSettings(Joiner2Settings.ROW_KEY_IDENTIFIER);
    settingsTest.setMaxOpenFiles(3);
    BufferedDataTable leftTable = m_exec.createBufferedDataTable(new TestData(100, 1), m_exec);
    BufferedDataTable rightTable = m_exec.createBufferedDataTable(new TestData(200, 1), m_exec);
    // run joiner with reference settings
    Joiner joinerRef = new Joiner(leftTable.getDataTableSpec(), rightTable.getDataTableSpec(), settingsRef);
    BufferedDataTable reference = joinerRef.computeJoinTable(leftTable, rightTable, m_exec);
    // run joiner with test settings
    Joiner joinerTest = new Joiner(leftTable.getDataTableSpec(), rightTable.getDataTableSpec(), settingsTest);
    joinerTest.setRowsAddedBeforeOOM(10);
    joinerTest.setNumBitsInitial(0);
    joinerTest.setNumBitsMaximal(6);
    BufferedDataTable test = joinerTest.computeJoinTable(leftTable, rightTable, m_exec);
    compareTables(reference, test);
}
Also used : BufferedDataTable(org.knime.core.node.BufferedDataTable) Test(org.junit.Test)

Example 93 with BufferedDataTable

use of org.knime.core.node.BufferedDataTable in project knime-core by knime.

the class JoinerTest method testSortPartitionsFullOuterJoin.

@Test
public void testSortPartitionsFullOuterJoin() throws Exception {
    Joiner2Settings settingsRef = createReferenceSettings(Joiner2Settings.ROW_KEY_IDENTIFIER);
    settingsRef.setJoinMode(JoinMode.FullOuterJoin);
    Joiner2Settings settingsTest = createReferenceSettings(Joiner2Settings.ROW_KEY_IDENTIFIER);
    settingsTest.setJoinMode(JoinMode.FullOuterJoin);
    settingsTest.setMaxOpenFiles(3);
    BufferedDataTable leftTable = m_exec.createBufferedDataTable(new TestData(100, 1), m_exec);
    BufferedDataTable rightTable = m_exec.createBufferedDataTable(new TestData(200, 1), m_exec);
    // run joiner with reference settings
    Joiner joinerRef = new Joiner(leftTable.getDataTableSpec(), rightTable.getDataTableSpec(), settingsRef);
    BufferedDataTable reference = joinerRef.computeJoinTable(leftTable, rightTable, m_exec);
    // run joiner with test settings
    Joiner joinerTest = new Joiner(leftTable.getDataTableSpec(), rightTable.getDataTableSpec(), settingsTest);
    joinerTest.setRowsAddedBeforeOOM(10);
    joinerTest.setNumBitsInitial(0);
    joinerTest.setNumBitsMaximal(6);
    BufferedDataTable test = joinerTest.computeJoinTable(leftTable, rightTable, m_exec);
    compareTables(reference, test);
}
Also used : BufferedDataTable(org.knime.core.node.BufferedDataTable) Test(org.junit.Test)

Example 94 with BufferedDataTable

use of org.knime.core.node.BufferedDataTable in project knime-core by knime.

the class LogRegCoordinator method learn.

/**
 * Performs the learning task by creating the appropriate LogRegLearner and all other objects
 * necessary for a successful training.
 *
 * @param trainingData a DataTable that contains the data on which to learn the logistic regression model
 * @param exec the execution context of the corresponding KNIME node
 * @return the content of the logistic regression model
 * @throws InvalidSettingsException if the settings cause inconsistencies during training
 * @throws CanceledExecutionException if the training is canceled
 */
LogisticRegressionContent learn(final BufferedDataTable trainingData, final ExecutionContext exec) throws InvalidSettingsException, CanceledExecutionException {
    CheckUtils.checkArgument(trainingData.size() > 0, "The input table is empty. Please provide data to learn on.");
    CheckUtils.checkArgument(trainingData.size() <= Integer.MAX_VALUE, "The input table contains too many rows.");
    LogRegLearner learner;
    if (m_settings.getSolver() == Solver.IRLS) {
        learner = new IrlsLearner(m_settings.getMaxEpoch(), m_settings.getEpsilon(), m_settings.isCalcCovMatrix());
    } else {
        learner = new SagLogRegLearner(m_settings);
    }
    double calcDomainTime = 1.0 / (5.0 * 2.0 + 1.0);
    exec.setMessage("Analyzing categorical data");
    BufferedDataTable dataTable = recalcDomainForTargetAndLearningFields(trainingData, exec.createSubExecutionContext(calcDomainTime));
    checkConstantLearningFields(dataTable);
    exec.setMessage("Building logistic regression model");
    ExecutionMonitor trainExec = exec.createSubProgress(1.0 - calcDomainTime);
    LogRegLearnerResult result;
    TrainingRowBuilder<ClassificationTrainingRow> rowBuilder = new SparseClassificationTrainingRowBuilder(dataTable, m_pmmlOutSpec, m_settings.getTargetReferenceCategory(), m_settings.getSortTargetCategories(), m_settings.getSortIncludesCategories());
    TrainingData<ClassificationTrainingRow> data;
    Long seed = m_settings.getSeed();
    if (m_settings.isInMemory()) {
        data = new InMemoryData<ClassificationTrainingRow>(dataTable, seed, rowBuilder);
    } else {
        data = new DataTableTrainingData<ClassificationTrainingRow>(trainingData, seed, rowBuilder, m_settings.getChunkSize(), exec.createSilentSubExecutionContext(0.0));
    }
    checkShapeCompatibility(data);
    result = learner.learn(data, trainExec);
    LogisticRegressionContent content = createContentFromLearnerResult(result, rowBuilder, trainingData.getDataTableSpec());
    addToWarning(learner.getWarningMessage());
    return content;
}
Also used : ClassificationTrainingRow(org.knime.base.node.mine.regression.logistic.learner4.data.ClassificationTrainingRow) SparseClassificationTrainingRowBuilder(org.knime.base.node.mine.regression.logistic.learner4.data.SparseClassificationTrainingRowBuilder) SagLogRegLearner(org.knime.base.node.mine.regression.logistic.learner4.sg.SagLogRegLearner) SagLogRegLearner(org.knime.base.node.mine.regression.logistic.learner4.sg.SagLogRegLearner) BufferedDataTable(org.knime.core.node.BufferedDataTable) ExecutionMonitor(org.knime.core.node.ExecutionMonitor)

Example 95 with BufferedDataTable

use of org.knime.core.node.BufferedDataTable in project knime-core by knime.

the class LogRegLearnerNodeModel method execute.

/**
 * {@inheritDoc}
 */
@Override
protected PortObject[] execute(final PortObject[] inObjects, final ExecutionContext exec) throws Exception {
    final BufferedDataTable data = (BufferedDataTable) inObjects[0];
    final DataTableSpec tableSpec = data.getDataTableSpec();
    final LogRegCoordinator coordinator = new LogRegCoordinator(tableSpec, m_settings);
    m_content = coordinator.learn(data, exec);
    String warn = coordinator.getWarningMessage();
    if (warn != null) {
        setWarningMessage(warn);
    }
    PMMLPortObject outPMMLPort = new PMMLPortObject((PMMLPortObjectSpec) coordinator.getOutputSpecs()[0], null, tableSpec);
    PMMLGeneralRegressionTranslator trans = new PMMLGeneralRegressionTranslator(m_content.createGeneralRegressionContent());
    outPMMLPort.addModelTranslater(trans);
    return new PortObject[] { outPMMLPort, m_content.createCoeffStatisticsTablePortObject(exec), m_content.createModelStatisticsTable(exec) };
}
Also used : DataTableSpec(org.knime.core.data.DataTableSpec) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PMMLGeneralRegressionTranslator(org.knime.base.node.mine.regression.pmmlgreg.PMMLGeneralRegressionTranslator) BufferedDataTable(org.knime.core.node.BufferedDataTable) PMMLPortObject(org.knime.core.node.port.pmml.PMMLPortObject) PortObject(org.knime.core.node.port.PortObject)

Aggregations

BufferedDataTable (org.knime.core.node.BufferedDataTable)425 DataTableSpec (org.knime.core.data.DataTableSpec)213 ColumnRearranger (org.knime.core.data.container.ColumnRearranger)148 DataRow (org.knime.core.data.DataRow)118 BufferedDataContainer (org.knime.core.node.BufferedDataContainer)97 PortObject (org.knime.core.node.port.PortObject)96 DataCell (org.knime.core.data.DataCell)85 DataColumnSpec (org.knime.core.data.DataColumnSpec)61 InvalidSettingsException (org.knime.core.node.InvalidSettingsException)60 DefaultRow (org.knime.core.data.def.DefaultRow)56 PMMLPortObject (org.knime.core.node.port.pmml.PMMLPortObject)54 RowKey (org.knime.core.data.RowKey)52 ExecutionMonitor (org.knime.core.node.ExecutionMonitor)50 CanceledExecutionException (org.knime.core.node.CanceledExecutionException)47 SettingsModelString (org.knime.core.node.defaultnodesettings.SettingsModelString)43 IOException (java.io.IOException)41 ExecutionContext (org.knime.core.node.ExecutionContext)40 ArrayList (java.util.ArrayList)33 LinkedHashMap (java.util.LinkedHashMap)31 DoubleValue (org.knime.core.data.DoubleValue)29