use of org.knime.core.node.BufferedDataTable in project knime-core by knime.
the class JoinerTest method testIncreaseNumPartitionsInnerJoin.
/**
* Checks whether an inner join works as expected when the number of partitions must be increased due to low memory.
*
* @throws Exception if an error occurs
*/
@Test
public final void testIncreaseNumPartitionsInnerJoin() throws Exception {
Joiner2Settings settingsRef = createReferenceSettings("Data");
Joiner2Settings settingsTest = createReferenceSettings("Data");
BufferedDataTable leftTable = m_exec.createBufferedDataTable(new TestData(100, 1), m_exec);
BufferedDataTable rightTable = m_exec.createBufferedDataTable(new TestData(200, 1), m_exec);
// run joiner with reference settings
Joiner joinerRef = new Joiner(leftTable.getDataTableSpec(), rightTable.getDataTableSpec(), settingsRef);
BufferedDataTable reference = joinerRef.computeJoinTable(leftTable, rightTable, m_exec);
// run joiner with test settings
Joiner joinerTest = new Joiner(leftTable.getDataTableSpec(), rightTable.getDataTableSpec(), settingsTest);
joinerTest.setRowsAddedBeforeOOM(10);
BufferedDataTable test = joinerTest.computeJoinTable(leftTable, rightTable, m_exec);
compareTables(reference, test);
}
use of org.knime.core.node.BufferedDataTable in project knime-core by knime.
the class JoinerTest method testSortPartitionsInnerJoin.
@Test
public void testSortPartitionsInnerJoin() throws Exception {
Joiner2Settings settingsRef = createReferenceSettings(Joiner2Settings.ROW_KEY_IDENTIFIER);
Joiner2Settings settingsTest = createReferenceSettings(Joiner2Settings.ROW_KEY_IDENTIFIER);
settingsTest.setMaxOpenFiles(3);
BufferedDataTable leftTable = m_exec.createBufferedDataTable(new TestData(100, 1), m_exec);
BufferedDataTable rightTable = m_exec.createBufferedDataTable(new TestData(200, 1), m_exec);
// run joiner with reference settings
Joiner joinerRef = new Joiner(leftTable.getDataTableSpec(), rightTable.getDataTableSpec(), settingsRef);
BufferedDataTable reference = joinerRef.computeJoinTable(leftTable, rightTable, m_exec);
// run joiner with test settings
Joiner joinerTest = new Joiner(leftTable.getDataTableSpec(), rightTable.getDataTableSpec(), settingsTest);
joinerTest.setRowsAddedBeforeOOM(10);
joinerTest.setNumBitsInitial(0);
joinerTest.setNumBitsMaximal(6);
BufferedDataTable test = joinerTest.computeJoinTable(leftTable, rightTable, m_exec);
compareTables(reference, test);
}
use of org.knime.core.node.BufferedDataTable in project knime-core by knime.
the class JoinerTest method testSortPartitionsFullOuterJoin.
@Test
public void testSortPartitionsFullOuterJoin() throws Exception {
Joiner2Settings settingsRef = createReferenceSettings(Joiner2Settings.ROW_KEY_IDENTIFIER);
settingsRef.setJoinMode(JoinMode.FullOuterJoin);
Joiner2Settings settingsTest = createReferenceSettings(Joiner2Settings.ROW_KEY_IDENTIFIER);
settingsTest.setJoinMode(JoinMode.FullOuterJoin);
settingsTest.setMaxOpenFiles(3);
BufferedDataTable leftTable = m_exec.createBufferedDataTable(new TestData(100, 1), m_exec);
BufferedDataTable rightTable = m_exec.createBufferedDataTable(new TestData(200, 1), m_exec);
// run joiner with reference settings
Joiner joinerRef = new Joiner(leftTable.getDataTableSpec(), rightTable.getDataTableSpec(), settingsRef);
BufferedDataTable reference = joinerRef.computeJoinTable(leftTable, rightTable, m_exec);
// run joiner with test settings
Joiner joinerTest = new Joiner(leftTable.getDataTableSpec(), rightTable.getDataTableSpec(), settingsTest);
joinerTest.setRowsAddedBeforeOOM(10);
joinerTest.setNumBitsInitial(0);
joinerTest.setNumBitsMaximal(6);
BufferedDataTable test = joinerTest.computeJoinTable(leftTable, rightTable, m_exec);
compareTables(reference, test);
}
use of org.knime.core.node.BufferedDataTable in project knime-core by knime.
the class LogRegCoordinator method learn.
/**
* Performs the learning task by creating the appropriate LogRegLearner and all other objects
* necessary for a successful training.
*
* @param trainingData a DataTable that contains the data on which to learn the logistic regression model
* @param exec the execution context of the corresponding KNIME node
* @return the content of the logistic regression model
* @throws InvalidSettingsException if the settings cause inconsistencies during training
* @throws CanceledExecutionException if the training is canceled
*/
LogisticRegressionContent learn(final BufferedDataTable trainingData, final ExecutionContext exec) throws InvalidSettingsException, CanceledExecutionException {
CheckUtils.checkArgument(trainingData.size() > 0, "The input table is empty. Please provide data to learn on.");
CheckUtils.checkArgument(trainingData.size() <= Integer.MAX_VALUE, "The input table contains too many rows.");
LogRegLearner learner;
if (m_settings.getSolver() == Solver.IRLS) {
learner = new IrlsLearner(m_settings.getMaxEpoch(), m_settings.getEpsilon(), m_settings.isCalcCovMatrix());
} else {
learner = new SagLogRegLearner(m_settings);
}
double calcDomainTime = 1.0 / (5.0 * 2.0 + 1.0);
exec.setMessage("Analyzing categorical data");
BufferedDataTable dataTable = recalcDomainForTargetAndLearningFields(trainingData, exec.createSubExecutionContext(calcDomainTime));
checkConstantLearningFields(dataTable);
exec.setMessage("Building logistic regression model");
ExecutionMonitor trainExec = exec.createSubProgress(1.0 - calcDomainTime);
LogRegLearnerResult result;
TrainingRowBuilder<ClassificationTrainingRow> rowBuilder = new SparseClassificationTrainingRowBuilder(dataTable, m_pmmlOutSpec, m_settings.getTargetReferenceCategory(), m_settings.getSortTargetCategories(), m_settings.getSortIncludesCategories());
TrainingData<ClassificationTrainingRow> data;
Long seed = m_settings.getSeed();
if (m_settings.isInMemory()) {
data = new InMemoryData<ClassificationTrainingRow>(dataTable, seed, rowBuilder);
} else {
data = new DataTableTrainingData<ClassificationTrainingRow>(trainingData, seed, rowBuilder, m_settings.getChunkSize(), exec.createSilentSubExecutionContext(0.0));
}
checkShapeCompatibility(data);
result = learner.learn(data, trainExec);
LogisticRegressionContent content = createContentFromLearnerResult(result, rowBuilder, trainingData.getDataTableSpec());
addToWarning(learner.getWarningMessage());
return content;
}
use of org.knime.core.node.BufferedDataTable in project knime-core by knime.
the class LogRegLearnerNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inObjects, final ExecutionContext exec) throws Exception {
final BufferedDataTable data = (BufferedDataTable) inObjects[0];
final DataTableSpec tableSpec = data.getDataTableSpec();
final LogRegCoordinator coordinator = new LogRegCoordinator(tableSpec, m_settings);
m_content = coordinator.learn(data, exec);
String warn = coordinator.getWarningMessage();
if (warn != null) {
setWarningMessage(warn);
}
PMMLPortObject outPMMLPort = new PMMLPortObject((PMMLPortObjectSpec) coordinator.getOutputSpecs()[0], null, tableSpec);
PMMLGeneralRegressionTranslator trans = new PMMLGeneralRegressionTranslator(m_content.createGeneralRegressionContent());
outPMMLPort.addModelTranslater(trans);
return new PortObject[] { outPMMLPort, m_content.createCoeffStatisticsTablePortObject(exec), m_content.createModelStatisticsTable(exec) };
}
Aggregations