use of org.knime.core.node.BufferedDataTable in project knime-core by knime.
the class EnrichmentPlotterModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
final double rowCount = inData[0].size();
final BufferedDataContainer areaOutCont = exec.createDataContainer(AREA_OUT_SPEC);
final BufferedDataContainer discrateOutCont = exec.createDataContainer(DISCRATE_OUT_SPEC);
for (int i = 0; i < m_settings.getCurveCount(); i++) {
final ExecutionMonitor sexec = exec.createSubProgress(1.0 / m_settings.getCurveCount());
exec.setMessage("Generating curve " + (i + 1));
final Curve c = m_settings.getCurve(i);
final Helper[] curve = new Helper[KnowsRowCountTable.checkRowCount(inData[0].size())];
final int sortIndex = inData[0].getDataTableSpec().findColumnIndex(c.getSortColumn());
final int actIndex = inData[0].getDataTableSpec().findColumnIndex(c.getActivityColumn());
int k = 0, maxK = 0;
for (DataRow row : inData[0]) {
DataCell c1 = row.getCell(sortIndex);
DataCell c2 = row.getCell(actIndex);
if (k++ % 100 == 0) {
sexec.checkCanceled();
sexec.setProgress(k / rowCount);
}
if (c1.isMissing()) {
continue;
} else {
curve[maxK] = new Helper(((DoubleValue) c1).getDoubleValue(), c2);
}
maxK++;
}
Arrays.sort(curve, 0, maxK);
if (c.isSortDescending()) {
for (int j = 0; j < maxK / 2; j++) {
Helper h = curve[j];
curve[j] = curve[maxK - j - 1];
curve[maxK - j - 1] = h;
}
}
// this is for down-sampling so that the view is faster;
// plotting >100,000 points takes quite a long time
final int size = Math.min(MAX_RESOLUTION, maxK);
final double downSampleRate = maxK / (double) size;
final double[] xValues = new double[size + 1];
final double[] yValues = new double[size + 1];
xValues[0] = 0;
yValues[0] = 0;
int lastK = 0;
double y = 0, area = 0;
int nextHitRatePoint = 0;
final double[] hitRateValues = new double[DISCRATE_POINTS.length];
final HashMap<DataCell, MutableInteger> clusters = new HashMap<DataCell, MutableInteger>();
for (k = 1; k <= maxK; k++) {
final Helper h = curve[k - 1];
if (m_settings.plotMode() == PlotMode.PlotSum) {
y += ((DoubleValue) h.b).getDoubleValue();
} else if (m_settings.plotMode() == PlotMode.PlotHits) {
if (!h.b.isMissing() && (((DoubleValue) h.b).getDoubleValue() >= m_settings.hitThreshold())) {
y++;
}
} else if (!h.b.isMissing()) {
MutableInteger count = clusters.get(h.b);
if (count == null) {
count = new MutableInteger(0);
clusters.put(h.b, count);
}
if (count.inc() == m_settings.minClusterMembers()) {
y++;
}
}
area += y / maxK;
if ((int) (k / downSampleRate) >= lastK + 1) {
lastK++;
xValues[lastK] = k;
yValues[lastK] = y;
}
if ((nextHitRatePoint < DISCRATE_POINTS.length) && (k == (int) Math.floor(maxK * DISCRATE_POINTS[nextHitRatePoint] / 100))) {
hitRateValues[nextHitRatePoint] = y;
nextHitRatePoint++;
}
}
xValues[xValues.length - 1] = maxK;
yValues[yValues.length - 1] = y;
area /= y;
m_curves.add(new EnrichmentPlot(c.getSortColumn() + " vs " + c.getActivityColumn(), xValues, yValues, area));
areaOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), new DoubleCell(area)));
for (int j = 0; j < hitRateValues.length; j++) {
hitRateValues[j] /= y;
}
discrateOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), hitRateValues));
}
areaOutCont.close();
discrateOutCont.close();
return new BufferedDataTable[] { areaOutCont.getTable(), discrateOutCont.getTable() };
}
use of org.knime.core.node.BufferedDataTable in project knime-core by knime.
the class CollectionSplitNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
BufferedDataTable table = inData[0];
DataTableSpec spec = table.getDataTableSpec();
ExecutionMonitor execForCR = exec;
// validate settings
getTargetColIndex(spec);
DataColumnSpec[] colSpecs;
switch(m_settings.getCountElementsPolicy()) {
case Count:
execForCR = exec.createSubProgress(0.7);
ExecutionMonitor e = exec.createSubProgress(0.3);
colSpecs = countNewColumns(table, e);
break;
case UseElementNamesOrFail:
colSpecs = getColSpecsByElementNames(spec);
break;
case BestEffort:
try {
colSpecs = getColSpecsByElementNames(spec);
} catch (InvalidSettingsException ise) {
execForCR = exec.createSubProgress(0.7);
e = exec.createSubProgress(0.3);
colSpecs = countNewColumns(table, e);
}
break;
default:
throw new InvalidSettingsException("Unsupported policy: " + m_settings.getCountElementsPolicy());
}
Pair<ColumnRearranger, SplitCellFactory> pair = createColumnRearranger(spec, colSpecs);
BufferedDataTable out = exec.createColumnRearrangeTable(table, pair.getFirst(), execForCR);
String warnMessage = pair.getSecond().getWarnMessage();
if (warnMessage != null) {
setWarningMessage(warnMessage);
}
if (m_settings.isDetermineMostSpecificDataType()) {
out = refineTypes(out, pair.getSecond(), exec);
}
return new BufferedDataTable[] { out };
}
use of org.knime.core.node.BufferedDataTable in project knime-core by knime.
the class RuleEngineNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
List<Rule> rules = parseRules(inData[0].getDataTableSpec());
ColumnRearranger crea = createRearranger(inData[0].getDataTableSpec(), rules);
return new BufferedDataTable[] { exec.createColumnRearrangeTable(inData[0], crea, exec) };
}
use of org.knime.core.node.BufferedDataTable in project knime-core by knime.
the class LogRegLearner method recalcDomainForTargetAndLearningFields.
private BufferedDataTable recalcDomainForTargetAndLearningFields(final BufferedDataTable data, final PMMLPortObjectSpec inPMMLSpec, final ExecutionContext exec) throws InvalidSettingsException, CanceledExecutionException {
final String targetCol = m_pmmlOutSpec.getTargetFields().get(0);
DataTableDomainCreator domainCreator = new DataTableDomainCreator(data.getDataTableSpec(), new DomainCreatorColumnSelection() {
@Override
public boolean dropDomain(final DataColumnSpec colSpec) {
return false;
}
@Override
public boolean createDomain(final DataColumnSpec colSpec) {
return colSpec.getName().equals(targetCol) || (colSpec.getType().isCompatible(NominalValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName()));
}
}, new DomainCreatorColumnSelection() {
@Override
public boolean dropDomain(final DataColumnSpec colSpec) {
// drop domain of numeric learning fields so that we can check for constant columns
return colSpec.getType().isCompatible(DoubleValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName());
}
@Override
public boolean createDomain(final DataColumnSpec colSpec) {
return colSpec.getType().isCompatible(DoubleValue.class) && m_pmmlOutSpec.getLearningFields().contains(colSpec.getName());
}
});
domainCreator.updateDomain(data, exec);
DataTableSpec spec = domainCreator.createSpec();
CheckUtils.checkSetting(spec.getColumnSpec(targetCol).getDomain().hasValues(), "Target column '%s' has too many" + " unique values - consider to use domain calucator node before to enforce calculation", targetCol);
BufferedDataTable newDataTable = exec.createSpecReplacerTable(data, spec);
// bug fix 5580 - ignore columns with too many different values
Set<String> columnWithTooManyDomainValues = new LinkedHashSet<>();
for (String learningField : m_pmmlOutSpec.getLearningFields()) {
DataColumnSpec columnSpec = spec.getColumnSpec(learningField);
if (columnSpec.getType().isCompatible(NominalValue.class) && !columnSpec.getDomain().hasValues()) {
columnWithTooManyDomainValues.add(learningField);
}
}
if (!columnWithTooManyDomainValues.isEmpty()) {
StringBuilder warning = new StringBuilder();
warning.append(columnWithTooManyDomainValues.size() == 1 ? "Column " : "Columns ");
warning.append(ConvenienceMethods.getShortStringFrom(columnWithTooManyDomainValues, 5));
warning.append(columnWithTooManyDomainValues.size() == 1 ? " has " : " have ");
warning.append("too many different values - will be ignored during training ");
warning.append("(enforce inclusion by using a domain calculator node before)");
LOGGER.warn(warning.toString());
m_warningMessage = (m_warningMessage == null ? "" : m_warningMessage + "\n") + warning.toString();
}
// initialize m_learner so that it has the correct DataTableSpec of the input
init(newDataTable.getDataTableSpec(), inPMMLSpec, columnWithTooManyDomainValues);
return newDataTable;
}
use of org.knime.core.node.BufferedDataTable in project knime-core by knime.
the class LogRegLearnerNodeModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected PortObject[] execute(final PortObject[] inObjects, final ExecutionContext exec) throws Exception {
final BufferedDataTable data = (BufferedDataTable) inObjects[0];
DataTableSpec tableSpec = data.getDataTableSpec();
// handle the optional PMML input
PMMLPortObject inPMMLPort = m_pmmlInEnabled ? (PMMLPortObject) inObjects[1] : null;
PMMLPortObjectSpec inPMMLSpec = null;
if (inPMMLPort != null) {
inPMMLSpec = inPMMLPort.getSpec();
} else {
PMMLPortObjectSpecCreator creator = new PMMLPortObjectSpecCreator(tableSpec);
inPMMLSpec = creator.createSpec();
inPMMLPort = new PMMLPortObject(inPMMLSpec);
}
LogRegLearner learner = new LogRegLearner(new PortObjectSpec[] { tableSpec, inPMMLSpec }, m_pmmlInEnabled, m_settings);
m_content = learner.execute(new PortObject[] { data, inPMMLPort }, exec);
String warn = learner.getWarningMessage();
if (warn != null) {
setWarningMessage(warn);
}
// third argument is ignored since we provide a port
PMMLPortObject outPMMLPort = new PMMLPortObject((PMMLPortObjectSpec) learner.getOutputSpec()[0], inPMMLPort, null);
PMMLGeneralRegressionTranslator trans = new PMMLGeneralRegressionTranslator(m_content.createGeneralRegressionContent());
outPMMLPort.addModelTranslater(trans);
return new PortObject[] { outPMMLPort, m_content.createTablePortObject(exec) };
}
Aggregations