use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class AutoBinner method calcDomainBoundsIfNeccessary.
/**
* Determines the per column min/max values of the given data if not already present in the domain.
*
* @param data the data
* @param exec the execution context
* @param recalcValuesFor The columns
* @return The data with extended domain information
* @throws InvalidSettingsException ...
* @throws CanceledExecutionException ...
*/
public BufferedDataTable calcDomainBoundsIfNeccessary(final BufferedDataTable data, final ExecutionContext exec, final List<String> recalcValuesFor) throws InvalidSettingsException, CanceledExecutionException {
if (null == recalcValuesFor || recalcValuesFor.isEmpty()) {
return data;
}
List<Integer> valuesI = new ArrayList<Integer>();
for (String colName : recalcValuesFor) {
DataColumnSpec colSpec = data.getDataTableSpec().getColumnSpec(colName);
if (!colSpec.getType().isCompatible(DoubleValue.class)) {
throw new InvalidSettingsException("Can only process numeric " + "data. The column \"" + colSpec.getName() + "\" is not numeric.");
}
if (recalcValuesFor.contains(colName) && !colSpec.getDomain().hasBounds()) {
valuesI.add(data.getDataTableSpec().findColumnIndex(colName));
}
}
if (valuesI.isEmpty()) {
return data;
}
Map<Integer, Double> min = new HashMap<Integer, Double>();
Map<Integer, Double> max = new HashMap<Integer, Double>();
for (int col : valuesI) {
min.put(col, Double.MAX_VALUE);
max.put(col, Double.MIN_VALUE);
}
int c = 0;
for (DataRow row : data) {
c++;
exec.checkCanceled();
exec.setProgress(c / (double) data.getRowCount());
for (int col : valuesI) {
double val = ((DoubleValue) row.getCell(col)).getDoubleValue();
if (min.get(col) > val) {
min.put(col, val);
}
if (max.get(col) < val) {
min.put(col, val);
}
}
}
List<DataColumnSpec> newColSpecList = new ArrayList<DataColumnSpec>();
int cc = 0;
for (DataColumnSpec columnSpec : data.getDataTableSpec()) {
if (recalcValuesFor.contains(columnSpec.getName())) {
DataColumnSpecCreator specCreator = new DataColumnSpecCreator(columnSpec);
DataColumnDomainCreator domainCreator = new DataColumnDomainCreator(new DoubleCell(min.get(cc)), new DoubleCell(max.get(cc)));
specCreator.setDomain(domainCreator.createDomain());
DataColumnSpec newColSpec = specCreator.createSpec();
newColSpecList.add(newColSpec);
} else {
newColSpecList.add(columnSpec);
}
cc++;
}
DataTableSpec spec = new DataTableSpec(newColSpecList.toArray(new DataColumnSpec[0]));
BufferedDataTable newDataTable = exec.createSpecReplacerTable(data, spec);
return newDataTable;
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class EnrichmentPlotterModel method execute.
/**
* {@inheritDoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception {
final double rowCount = inData[0].size();
final BufferedDataContainer areaOutCont = exec.createDataContainer(AREA_OUT_SPEC);
final BufferedDataContainer discrateOutCont = exec.createDataContainer(DISCRATE_OUT_SPEC);
for (int i = 0; i < m_settings.getCurveCount(); i++) {
final ExecutionMonitor sexec = exec.createSubProgress(1.0 / m_settings.getCurveCount());
exec.setMessage("Generating curve " + (i + 1));
final Curve c = m_settings.getCurve(i);
final Helper[] curve = new Helper[KnowsRowCountTable.checkRowCount(inData[0].size())];
final int sortIndex = inData[0].getDataTableSpec().findColumnIndex(c.getSortColumn());
final int actIndex = inData[0].getDataTableSpec().findColumnIndex(c.getActivityColumn());
int k = 0, maxK = 0;
for (DataRow row : inData[0]) {
DataCell c1 = row.getCell(sortIndex);
DataCell c2 = row.getCell(actIndex);
if (k++ % 100 == 0) {
sexec.checkCanceled();
sexec.setProgress(k / rowCount);
}
if (c1.isMissing()) {
continue;
} else {
curve[maxK] = new Helper(((DoubleValue) c1).getDoubleValue(), c2);
}
maxK++;
}
Arrays.sort(curve, 0, maxK);
if (c.isSortDescending()) {
for (int j = 0; j < maxK / 2; j++) {
Helper h = curve[j];
curve[j] = curve[maxK - j - 1];
curve[maxK - j - 1] = h;
}
}
// this is for down-sampling so that the view is faster;
// plotting >100,000 points takes quite a long time
final int size = Math.min(MAX_RESOLUTION, maxK);
final double downSampleRate = maxK / (double) size;
final double[] xValues = new double[size + 1];
final double[] yValues = new double[size + 1];
xValues[0] = 0;
yValues[0] = 0;
int lastK = 0;
double y = 0, area = 0;
int nextHitRatePoint = 0;
final double[] hitRateValues = new double[DISCRATE_POINTS.length];
final HashMap<DataCell, MutableInteger> clusters = new HashMap<DataCell, MutableInteger>();
for (k = 1; k <= maxK; k++) {
final Helper h = curve[k - 1];
if (m_settings.plotMode() == PlotMode.PlotSum) {
y += ((DoubleValue) h.b).getDoubleValue();
} else if (m_settings.plotMode() == PlotMode.PlotHits) {
if (!h.b.isMissing() && (((DoubleValue) h.b).getDoubleValue() >= m_settings.hitThreshold())) {
y++;
}
} else if (!h.b.isMissing()) {
MutableInteger count = clusters.get(h.b);
if (count == null) {
count = new MutableInteger(0);
clusters.put(h.b, count);
}
if (count.inc() == m_settings.minClusterMembers()) {
y++;
}
}
area += y / maxK;
if ((int) (k / downSampleRate) >= lastK + 1) {
lastK++;
xValues[lastK] = k;
yValues[lastK] = y;
}
if ((nextHitRatePoint < DISCRATE_POINTS.length) && (k == (int) Math.floor(maxK * DISCRATE_POINTS[nextHitRatePoint] / 100))) {
hitRateValues[nextHitRatePoint] = y;
nextHitRatePoint++;
}
}
xValues[xValues.length - 1] = maxK;
yValues[yValues.length - 1] = y;
area /= y;
m_curves.add(new EnrichmentPlot(c.getSortColumn() + " vs " + c.getActivityColumn(), xValues, yValues, area));
areaOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), new DoubleCell(area)));
for (int j = 0; j < hitRateValues.length; j++) {
hitRateValues[j] /= y;
}
discrateOutCont.addRowToTable(new DefaultRow(new RowKey(c.toString()), hitRateValues));
}
areaOutCont.close();
discrateOutCont.close();
return new BufferedDataTable[] { areaOutCont.getTable(), discrateOutCont.getTable() };
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class PolyRegLearnerNodeModel method getCellFactory.
private CellFactory getCellFactory(final int dependentIndex) {
final int degree = m_settings.getDegree();
return new CellFactory() {
@Override
public DataCell[] getCells(final DataRow row) {
double sum = m_betas[0];
int betaCount = 1;
double y = 0;
for (int col = 0; col < row.getNumCells(); col++) {
if ((col != dependentIndex) && m_colSelected[col]) {
final double value = ((DoubleValue) row.getCell(col)).getDoubleValue();
double poly = 1;
for (int d = 1; d <= degree; d++) {
poly *= value;
sum += m_betas[betaCount++] * poly;
}
} else if (col == dependentIndex) {
y = ((DoubleValue) row.getCell(col)).getDoubleValue();
}
}
double err = Math.abs(sum - y);
m_squaredError += err * err;
return new DataCell[] { new DoubleCell(sum), new DoubleCell(err) };
}
@Override
public DataColumnSpec[] getColumnSpecs() {
DataColumnSpecCreator crea = new DataColumnSpecCreator("PolyReg prediction", DoubleCell.TYPE);
DataColumnSpec col1 = crea.createSpec();
crea = new DataColumnSpecCreator("Prediction Error", DoubleCell.TYPE);
DataColumnSpec col2 = crea.createSpec();
return new DataColumnSpec[] { col1, col2 };
}
@Override
public void setProgress(final int curRowNr, final int rowCount, final RowKey lastKey, final ExecutionMonitor execMon) {
// do nothing
}
};
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class RegressionPredictorNodeModel method createRearranger.
private ColumnRearranger createRearranger(final DataTableSpec inSpec, final PMMLPortObjectSpec regModelSpec, final PMMLRegressionTranslator regModel) throws InvalidSettingsException {
if (regModelSpec == null) {
throw new InvalidSettingsException("No input");
}
// exclude last (response column)
String targetCol = "Response";
for (String s : regModelSpec.getTargetFields()) {
targetCol = s;
break;
}
final List<String> learnFields;
if (regModel != null) {
RegressionTable regTable = regModel.getRegressionTable();
learnFields = new ArrayList<String>();
for (NumericPredictor p : regTable.getVariables()) {
learnFields.add(p.getName());
}
} else {
learnFields = new ArrayList<String>(regModelSpec.getLearningFields());
}
final int[] colIndices = new int[learnFields.size()];
int k = 0;
for (String learnCol : learnFields) {
int index = inSpec.findColumnIndex(learnCol);
if (index < 0) {
throw new InvalidSettingsException("Missing column for " + "regressor variable : \"" + learnCol + "\"");
}
DataColumnSpec regressor = inSpec.getColumnSpec(index);
String name = regressor.getName();
DataColumnSpec col = inSpec.getColumnSpec(index);
if (!col.getType().isCompatible(DoubleValue.class)) {
throw new InvalidSettingsException("Incompatible type of " + "column \"" + name + "\": " + col.getType());
}
colIndices[k++] = index;
}
// try to use some smart naming scheme for the append column
String oldName = targetCol;
if (inSpec.containsName(oldName) && !oldName.toLowerCase().endsWith("(prediction)")) {
oldName = oldName + " (prediction)";
}
String newColName = DataTableSpec.getUniqueColumnName(inSpec, oldName);
DataColumnSpec newCol = new DataColumnSpecCreator(newColName, DoubleCell.TYPE).createSpec();
SingleCellFactory fac = new SingleCellFactory(newCol) {
@Override
public DataCell getCell(final DataRow row) {
RegressionTable t = regModel.getRegressionTable();
int j = 0;
double result = t.getIntercept();
for (NumericPredictor p : t.getVariables()) {
DataCell c = row.getCell(colIndices[j++]);
if (c.isMissing()) {
return DataType.getMissingCell();
}
double v = ((DoubleValue) c).getDoubleValue();
if (p.getExponent() != 1) {
v = Math.pow(v, p.getExponent());
}
result += p.getCoefficient() * v;
}
return new DoubleCell(result);
}
};
ColumnRearranger c = new ColumnRearranger(inSpec);
c.append(fac);
return c;
}
use of org.knime.core.data.DoubleValue in project knime-core by knime.
the class AbstractTreeEnsembleModel method createNominalNumericPredictorRecord.
private PredictorRecord createNominalNumericPredictorRecord(final DataRow filterRow, final DataTableSpec trainSpec) {
final int nrCols = trainSpec.getNumColumns();
Map<String, Object> valueMap = new LinkedHashMap<String, Object>((int) (nrCols / 0.75 + 1.0));
for (int i = 0; i < nrCols; i++) {
DataColumnSpec col = trainSpec.getColumnSpec(i);
String colName = col.getName();
DataType colType = col.getType();
DataCell cell = filterRow.getCell(i);
if (cell.isMissing()) {
valueMap.put(colName, PredictorRecord.NULL);
} else if (colType.isCompatible(NominalValue.class)) {
TreeNominalColumnMetaData nomColMeta = (TreeNominalColumnMetaData) m_metaData.getAttributeMetaData(i);
NominalValueRepresentation[] nomVals = nomColMeta.getValues();
int assignedInteger = -1;
String val = cell.toString();
// find assignedInteger of value
for (NominalValueRepresentation nomVal : nomVals) {
if (nomVal.getNominalValue().equals(val)) {
assignedInteger = nomVal.getAssignedInteger();
break;
}
}
// the value is not known to the model
if (assignedInteger == -1) {
// treat as missing value
valueMap.put(colName, PredictorRecord.NULL);
} else {
valueMap.put(colName, Integer.valueOf(assignedInteger));
}
} else if (colType.isCompatible(DoubleValue.class)) {
double val = ((DoubleValue) cell).getDoubleValue();
if (Double.isNaN(val)) {
// make sure that NaNs are treated as missing values
// bug AP-7169
valueMap.put(colName, PredictorRecord.NULL);
} else {
valueMap.put(colName, val);
}
} else {
throw new IllegalStateException("Expected nominal or numeric column type for column \"" + colName + "\" but got \"" + colType + "\"");
}
}
return new PredictorRecord(valueMap);
}
Aggregations