use of org.knime.core.node.BufferedDataTable in project knime-core by knime.
the class LogRegLearner method execute.
/**
* Compute logistic regression model.
*
* @param portObjects The input objects.
* @param exec the execution context
* @return a {@link LogisticRegressionContent} storing computed data
* @throws Exception if computation of the logistic regression model is not successful or if given data is
* inconsistent with the settings defined in the constructor.
* @see LogRegLearnerNodeModel#execute(PortObject[], ExecutionContext)
*/
public LogisticRegressionContent execute(final PortObject[] portObjects, final ExecutionContext exec) throws Exception {
BufferedDataTable data = (BufferedDataTable) portObjects[0];
init(data.getDataTableSpec(), Collections.<String>emptySet());
// the learner typically needs five steps with two runs over the data each step, calculating
// the domain needs run over the data
double calcDomainTime = 1.0 / (5.0 * 2.0 + 1.0);
exec.setMessage("Analyzing categorical data");
BufferedDataTable dataTable = recalcDomainForTargetAndLearningFields(data, exec.createSubExecutionContext(calcDomainTime));
checkConstantLearningFields(dataTable);
exec.setMessage("Building logistic regression model");
return m_learner.perform(dataTable, exec.createSubExecutionContext(1.0 - calcDomainTime));
}
use of org.knime.core.node.BufferedDataTable in project knime-core by knime.
the class LogRegLearner method recalcDomainForTargetAndLearningFields.
private BufferedDataTable recalcDomainForTargetAndLearningFields(final BufferedDataTable data, final ExecutionContext exec) throws InvalidSettingsException, CanceledExecutionException {
final String targetCol = m_pmmlOutSpec.getTargetFields().get(0);
DataTableDomainCreator domainCreator = new DataTableDomainCreator(data.getDataTableSpec(), new DomainCreatorColumnSelection() {
@Override
public boolean dropDomain(final DataColumnSpec colSpec) {
return false;
}
@Override
public boolean createDomain(final DataColumnSpec colSpec) {
return colSpec.getName().equals(targetCol) || (colSpec.getType().isCompatible(NominalValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName()));
}
}, new DomainCreatorColumnSelection() {
@Override
public boolean dropDomain(final DataColumnSpec colSpec) {
// drop domain of numeric learning fields so that we can check for constant columns
return colSpec.getType().isCompatible(DoubleValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName());
}
@Override
public boolean createDomain(final DataColumnSpec colSpec) {
return colSpec.getType().isCompatible(DoubleValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName());
}
});
domainCreator.updateDomain(data, exec);
DataTableSpec spec = domainCreator.createSpec();
CheckUtils.checkSetting(spec.getColumnSpec(targetCol).getDomain().hasValues(), "Target column '%s' has too many" + " unique values - consider to use domain calucator node before to enforce calculation", targetCol);
BufferedDataTable newDataTable = exec.createSpecReplacerTable(data, spec);
// bug fix 5580 - ignore columns with too many different values
Set<String> columnWithTooManyDomainValues = new LinkedHashSet<>();
for (String learningField : m_pmmlOutSpec.getLearningFields()) {
DataColumnSpec columnSpec = spec.getColumnSpec(learningField);
if (columnSpec.getType().isCompatible(NominalValue.class) && !columnSpec.getDomain().hasValues()) {
columnWithTooManyDomainValues.add(learningField);
}
}
if (!columnWithTooManyDomainValues.isEmpty()) {
StringBuilder warning = new StringBuilder();
warning.append(columnWithTooManyDomainValues.size() == 1 ? "Column " : "Columns ");
warning.append(ConvenienceMethods.getShortStringFrom(columnWithTooManyDomainValues, 5));
warning.append(columnWithTooManyDomainValues.size() == 1 ? " has " : " have ");
warning.append("too many different values - will be ignored during training ");
warning.append("(enforce inclusion by using a domain calculator node before)");
LOGGER.warn(warning.toString());
m_warningMessage = (m_warningMessage == null ? "" : m_warningMessage + "\n") + warning.toString();
}
// initialize m_learner so that it has the correct DataTableSpec of the input
init(newDataTable.getDataTableSpec(), columnWithTooManyDomainValues);
return newDataTable;
}
use of org.knime.core.node.BufferedDataTable in project knime-core by knime.
the class RegressionPredictorNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
public PortObject[] execute(final PortObject[] inData, final ExecutionContext exec) throws Exception {
PMMLPortObject regModel = (PMMLPortObject) inData[0];
List<Node> models = regModel.getPMMLValue().getModels(PMMLModelType.RegressionModel);
if (models.isEmpty()) {
String msg = "No Regression Model found.";
LOGGER.error(msg);
throw new RuntimeException(msg);
}
PMMLRegressionTranslator trans = new PMMLRegressionTranslator();
regModel.initializeModelTranslator(trans);
BufferedDataTable data = (BufferedDataTable) inData[1];
DataTableSpec spec = data.getDataTableSpec();
ColumnRearranger c = createRearranger(spec, regModel.getSpec(), trans);
BufferedDataTable out = exec.createColumnRearrangeTable(data, c, exec);
return new BufferedDataTable[] { out };
}
use of org.knime.core.node.BufferedDataTable in project knime-core by knime.
the class MissingValueHandling3Table method createMissingValueHandlingTable.
/**
* Does missing value handling to the argument table given the col settings in an array and also reports progress.
*
* @param table the table to do missing value handling on
* @param colSettings the settings
* @param exec for progress/cancel and to create the buffered data table
* @param warningBuffer To which potential warning messages are added.
* @return a cache table, cleaned up
* @throws CanceledExecutionException if canceled
* @since 2.8
* @deprecated use {@link #createMissingValueHandlingTable(BufferedDataTable, MissingValueHandling2ColSetting[],
* ExecutionContext, StringBuilder)} instead
*/
@Deprecated
public static BufferedDataTable createMissingValueHandlingTable(final BufferedDataTable table, final MissingValueHandling2ColSetting[] colSettings, final ExecutionContext exec, final StringBuffer warningBuffer) throws CanceledExecutionException {
StringBuilder temp = new StringBuilder();
BufferedDataTable outTable = createMissingValueHandlingTable(table, colSettings, exec, temp);
warningBuffer.append(temp);
return outTable;
}
use of org.knime.core.node.BufferedDataTable in project knime-core by knime.
the class PortObjectRepository method copy.
/**
* Copies the argument object by means of the associated serializer.
* @param object The port object to be copied.
* @param exec Host for BDTs being created
* @param progress For progress/cancelation
* @return The deep copy.
* @throws IOException In case of exceptions while accessing the streams
* @throws CanceledExecutionException If canceled.
*/
public static final PortObject copy(final PortObject object, final ExecutionContext exec, final ExecutionMonitor progress) throws IOException, CanceledExecutionException {
if (object instanceof BufferedDataTable) {
// need to copy the table cell by cell
// this is to workaround the standard knime philosophy according
// to which tables are referenced. A row-based copy will not work
// as it still will reference blobs
BufferedDataTable in = (BufferedDataTable) object;
BufferedDataContainer con = exec.createDataContainer(in.getSpec(), true, 0);
final long rowCount = in.size();
long row = 0;
boolean hasLoggedCloneProblem = false;
for (DataRow r : in) {
DataCell[] cells = new DataCell[r.getNumCells()];
for (int i = 0; i < cells.length; i++) {
// deserialize blob
DataCell c = r.getCell(i);
if (c instanceof BlobDataCell) {
try {
c = cloneBlobCell(c);
} catch (Exception e) {
if (!hasLoggedCloneProblem) {
LOGGER.warn("Can't clone blob object: " + e.getMessage(), e);
hasLoggedCloneProblem = true;
LOGGER.debug("Suppressing futher warnings.");
}
}
}
cells[i] = c;
}
con.addRowToTable(new DefaultRow(r.getKey(), cells));
progress.setProgress(row / (double) rowCount, "Copied row " + row + "/" + rowCount);
progress.checkCanceled();
row++;
}
con.close();
return con.getTable();
}
return Node.copyPortObject(object, exec);
}
Aggregations